{ "metadata": { "artifact_id": "3890921f-a0f5-46bd-9688-2597c7e03ee7", "name": "mlperf-gpt-j-fp8", "timestamp": 1745456450, "furiosa_llm_version": "249c6f1", "furiosa_compiler_version": "b504d5d48" }, "model": { "generator_config": { "position_id_pad": 1, "buckets": [ { "batch_size": 1, "attention_size": 512, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 768, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 896, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 1024, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 1152, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 1280, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 1408, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 1536, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 1664, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 1920, "kv_cache_size": 0 }, { "batch_size": 64, "attention_size": 2048, "kv_cache_size": 2047 }, { "batch_size": 128, "attention_size": 2048, "kv_cache_size": 2047 } ], "model_qname": "furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM", "paged_attention_config": { "block_size": 1, "padding_block_idx": 0 }, "packing_type": "IDENTITY", "kv_cache_sharing_across_beams_config": { "beam_width": 4, "max_new_tokens": 128 }, "num_speculative_tokens": null, "unpadded_vocab_size": null }, "hf_config": { "vocab_size": 50401, "n_positions": 2048, "n_embd": 4096, "n_layer": 28, "n_head": 16, "n_inner": null, "rotary_dim": 64, "activation_function": "gelu_new", "resid_pdrop": 0.0, "embd_pdrop": 0.0, "attn_pdrop": 0.0, "layer_norm_epsilon": 0.00001, "initializer_range": 0.02, "use_cache": true, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": "float32", "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": false, "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": [ "GPTJForCausalLM" ], "finetuning_task": null, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "tokenizer_class": "GPT2Tokenizer", "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": { "text-generation": { "do_sample": true, "max_length": 50, "temperature": 1.0 } }, "problem_type": null, "_name_or_path": "furiosa-ai/mlperf-gpt-j-6b", "_attn_implementation_autoset": false, "transformers_version": "4.48.1", "gradient_checkpointing": false, "model_type": "gptj", "rotary": true, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true }, "model_metadata": { "pretrained_id": "furiosa-ai/mlperf-gpt-j-6b", "task_type": "text-generation", "llm_config": { "optimization_config": { "attention_type": "PAGED_ATTENTION", "optimize_rope": true, "optimize_packed": true, "decompose_layernorm": false, "optimize_furiosa": false, "use_unsplit_packed": false, "compact_causal_mask": false, "use_rngd_gelu": true, "causal_mask_free_decoding": true, "kv_cache_sharing_across_beams": true, "inbound_beamsearch_softmax": true, "calculate_logit_only_for_last_token": false, "optimized_for_speculative_decoding": false }, "quantization_config": { "weight": "fp8", "activation": "fp8", "kv_cache": "fp8", "use_mcp": true } }, "hf_configs": {}, "model_weight_path": null, "trust_remote_code": null, "allow_bfloat16_cast_with_mcp": true, "auto_bfloat16_cast": null }, "model_rewriting_config": { "do_decompositions_for_model_rewrite": false, "use_blockwise_compile": true, "embedding_layer_as_single_block": false, "num_blocks_per_supertask": 1, "embed_all_constants_into_graph": false, "optimize_logit_shape": true }, "parallel_config": { "tensor_parallel_size": 4, "pipeline_parallel_size": 1 }, "pipelines": [ { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn512", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 512 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 512 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 512, 512 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 512 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 512 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 512, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "3720b13705fc725d18a2226a5aa53054" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "6f21938bfb4069ed906a1da74dd57178" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "c43b1888e8cf33792e1d6887c3df03bd" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 512 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 512 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 512, 512 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 512 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 512 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 512 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 512 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 512 ], [ 0, 512 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 512 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 512 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "3720b13705fc725d18a2226a5aa53054": null, "6f21938bfb4069ed906a1da74dd57178": null, "c43b1888e8cf33792e1d6887c3df03bd": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn512-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn768", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 768 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 768 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 768, 768 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 768 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 768 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 768, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "14c1a2095eb9c3a506e9626f128a66cf" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "7448d72a5fda7547fe9ad2b6f3ad97c7" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "610270bc5c975f554829183d913e5bc7" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 768 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 768 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 768, 768 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 768 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 768 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 768 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 768 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 768 ], [ 0, 768 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 768 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 768 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "14c1a2095eb9c3a506e9626f128a66cf": null, "7448d72a5fda7547fe9ad2b6f3ad97c7": null, "610270bc5c975f554829183d913e5bc7": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn768-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn896", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 896 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 896 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 896, 896 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 896 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 896 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 896, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "bd08125c4282ab1fe0c8772d973554a8" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "25abc103ddf44d724373ff2f36432ec7" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "963ad0449b698838213ebd9cacee4ac0" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 896 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 896 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 896, 896 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 896 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 896 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 896 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 896 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 896 ], [ 0, 896 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 896 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 896 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "bd08125c4282ab1fe0c8772d973554a8": null, "25abc103ddf44d724373ff2f36432ec7": null, "963ad0449b698838213ebd9cacee4ac0": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn896-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1024", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 1024 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 1024 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 1024, 1024 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 1024 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 1024 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 1024, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "f3644e9896baca7db2dd9c9a3b41b527" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "6b908b290b8ca1a76889bc4b178b9856" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "fd9ffdee553b8d6e7fec5bbdead5da26" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 1024 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 1024 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 1024, 1024 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 1024 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 1024 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 1024 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 1024 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 1024 ], [ 0, 1024 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 1024 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 1024 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "f3644e9896baca7db2dd9c9a3b41b527": null, "6b908b290b8ca1a76889bc4b178b9856": null, "fd9ffdee553b8d6e7fec5bbdead5da26": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1024-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1152", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 1152 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 1152 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 1152, 1152 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 1152 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 1152 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 1152, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "2d1ff096fac5f7369d4fe0df06ea9350" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "1e2ed7604bb259726ca13f29add32333" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "9d656d7523cb06049b60978fcfda0ce6" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 1152 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 1152 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 1152, 1152 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 1152 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 1152 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 1152 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 1152 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 1152 ], [ 0, 1152 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 1152 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 1152 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "2d1ff096fac5f7369d4fe0df06ea9350": null, "1e2ed7604bb259726ca13f29add32333": null, "9d656d7523cb06049b60978fcfda0ce6": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1152-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1280", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 1280 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 1280 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 1280, 1280 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 1280 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 1280 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 1280, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "5b138d280525401763559084fb14da81" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "58d3a49ca251bd09d8a562ee46b36db4" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "1129bce53d3d1ee5a8f977a0806f1f46" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 1280 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 1280 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 1280, 1280 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 1280 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 1280 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 1280 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 1280 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 1280 ], [ 0, 1280 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 1280 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 1280 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "5b138d280525401763559084fb14da81": null, "58d3a49ca251bd09d8a562ee46b36db4": null, "1129bce53d3d1ee5a8f977a0806f1f46": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1280-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1408", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 1408 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 1408 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 1408, 1408 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 1408 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 1408 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 1408, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "137d10656805f6f8bc18c24d6f91d9c5" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "35e702f367369ea52eaa1340e9468777" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "48a2cfb22464b00526ea1aa027af6c0c" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 1408 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 1408 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 1408, 1408 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 1408 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 1408 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 1408 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 1408 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 1408 ], [ 0, 1408 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 1408 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 1408 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "137d10656805f6f8bc18c24d6f91d9c5": null, "35e702f367369ea52eaa1340e9468777": null, "48a2cfb22464b00526ea1aa027af6c0c": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1408-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1536", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 1536 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 1536 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 1536, 1536 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 1536 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 1536 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 1536, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "8410ae84799fc187448af43b3dc6a8d6" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "7109f6f7354167d3d4b1891a6214f639" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "6b29f1caa7f2cd692f042a87256a36e0" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 1536 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 1536 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 1536, 1536 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 1536 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 1536 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 1536 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 1536 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 1536 ], [ 0, 1536 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 1536 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 1536 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "8410ae84799fc187448af43b3dc6a8d6": null, "7109f6f7354167d3d4b1891a6214f639": null, "6b29f1caa7f2cd692f042a87256a36e0": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1536-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1664", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 1664 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 1664 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 1664, 1664 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 1664 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 1664 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 1664, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "8c154ca0f99ba80642cd90f0287ef820" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "c7f564baab300a5c527cd066d6e4c425" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "de2b0ec73c43d254f53398576cde487e" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 1664 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 1664 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 1664, 1664 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 1664 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 1664 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 1664 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 1664 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 1664 ], [ 0, 1664 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 1664 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 1664 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "8c154ca0f99ba80642cd90f0287ef820": null, "c7f564baab300a5c527cd066d6e4c425": null, "de2b0ec73c43d254f53398576cde487e": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1664-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1920", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant19_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant20_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant20_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant23_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant23_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant27_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant27_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant31_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant31_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant39_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_0_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_2_input_1_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_0_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant43_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_3_input_1_1_scale", "name_in_graph": "_tensor_constant43_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant45_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant45_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant48_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant48_r0", "placements": [] } }, "_tensor_constant49_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant49_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant52_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant52_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant56_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_9.QLV4_output_scale", "name_in_graph": "_tensor_constant56_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant60_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant60_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant63_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant63_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant68_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant69_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant72_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant72_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant78_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant78_r0", "placements": [] } }, "_tensor_constant79_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant83_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant85_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant85_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant86_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant87_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant99_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_0_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant101_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_6_input_1_1_scale", "name_in_graph": "_tensor_constant101_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_0_1_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant103_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_7_input_1_1_scale", "name_in_graph": "_tensor_constant103_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant105_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant108_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [] } }, "_tensor_constant109_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant110_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant110_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant112_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant113_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant115_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_19.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant126_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant127_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant128_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant129_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant130_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant130_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant132_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant132_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant135_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant138_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant138_r0", "placements": [] } }, "_tensor_constant139_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant139_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant140_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant143_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant143_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant145_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant146_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant147_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant147_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant150_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant150_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant156_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant157_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant158_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant159_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant159_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_0_1_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant161_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_10_input_1_1_scale", "name_in_graph": "_tensor_constant161_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_0_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_11_input_1_1_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant165_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant165_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant168_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant168_r0", "placements": [] } }, "_tensor_constant169_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant172_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant172_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant175_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant176_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_29.QLV4_output_scale", "name_in_graph": "_tensor_constant176_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant179_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant179_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant184_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant187_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant188_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant188_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant189_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant190_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant190_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant193_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant195_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant198_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant205_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant205_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant206_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant213_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant214_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant216_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant219_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant219_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant220_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_0_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_14_input_1_1_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant222_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_0_1_scale", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant223_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_15_input_1_1_scale", "name_in_graph": "_tensor_constant223_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant225_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant228_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [] } }, "_tensor_constant229_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant230_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant230_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant235_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant236_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_39.QLV4_output_scale", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant237_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant237_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant243_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant244_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant246_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant246_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant247_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant248_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant248_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant249_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant249_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant251_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant252_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant252_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant253_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant255_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant255_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant258_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant258_r0", "placements": [] } }, "_tensor_constant259_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant259_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant263_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant263_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant265_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant266_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant266_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant271_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant273_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant274_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant276_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant277_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant277_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant278_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant279_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant280_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_0_1_scale", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant281_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_18_input_1_1_scale", "name_in_graph": "_tensor_constant281_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant282_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_0_1_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant283_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_19_input_1_1_scale", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant285_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant288_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant288_r0", "placements": [] } }, "_tensor_constant289_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant292_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant292_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant295_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant295_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant296_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_49.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant301_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant303_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant304_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant304_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant306_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant306_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant307_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant307_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant308_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant310_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant310_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant311_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant312_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant313_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant313_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant318_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant325_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant326_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_54.QLV4_output_scale", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant329_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant331_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant333_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant333_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant334_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant336_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant337_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant339_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant339_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant340_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_0_1_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant341_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_22_input_1_1_scale", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant342_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_0_1_scale", "name_in_graph": "_tensor_constant342_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant343_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_23_input_1_1_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant345_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant348_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [] } }, "_tensor_constant349_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant350_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant350_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant353_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant353_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant355_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant356_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_59.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant357_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant359_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant361_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant363_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant364_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant364_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant366_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant368_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant368_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant370_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant371_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant371_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant372_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant375_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant375_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant378_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant378_r0", "placements": [] } }, "_tensor_constant379_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant379_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant382_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant382_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant385_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant386_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_64.QLV4_output_scale", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant387_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant389_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant391_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant391_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant393_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant393_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant394_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant394_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant397_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant397_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant400_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_0_1_scale", "name_in_graph": "_tensor_constant400_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant401_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_26_input_1_1_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_0_1_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant403_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_27_input_1_1_scale", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant405_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant408_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant408_r0", "placements": [] } }, "_tensor_constant409_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant415_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_69.QLV4_output_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant417_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant419_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant420_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant420_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant421_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant423_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant424_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant426_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant426_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant429_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant429_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant430_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant432_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant433_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant433_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant438_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [] } }, "_tensor_constant439_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant440_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant440_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant445_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_74.QLV4_output_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant447_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant449_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant449_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant451_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant451_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant453_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant458_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant458_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant459_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_0_1_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant461_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_30_input_1_1_scale", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant462_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_0_1_scale", "name_in_graph": "_tensor_constant462_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant463_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_31_input_1_1_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant465_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant468_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant468_r0", "placements": [] } }, "_tensor_constant469_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant469_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant475_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_79.QLV4_output_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant477_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant479_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant480_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant480_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant481_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant481_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant484_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant484_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant487_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant487_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant488_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant489_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant490_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant491_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant491_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant492_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant495_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant495_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant498_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant498_r0", "placements": [] } }, "_tensor_constant499_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant503_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant505_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_84.QLV4_output_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant507_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant507_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant509_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant509_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant510_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant511_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant513_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant513_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant516_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant516_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant517_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant518_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant519_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant520_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_0_1_scale", "name_in_graph": "_tensor_constant520_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant521_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_34_input_1_1_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_0_1_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant523_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_35_input_1_1_scale", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant525_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant528_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [] } }, "_tensor_constant529_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant532_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant533_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant535_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant536_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_89.QLV4_output_scale", "name_in_graph": "_tensor_constant536_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant537_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant539_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant546_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant547_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant549_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant549_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant550_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant552_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant553_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant553_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant555_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant558_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant558_r0", "placements": [] } }, "_tensor_constant559_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant562_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant563_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant565_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant565_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_94.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant567_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant567_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant569_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant571_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant571_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant574_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant574_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant576_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant578_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant578_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_0_1_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant581_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_38_input_1_1_scale", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant582_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_0_1_scale", "name_in_graph": "_tensor_constant582_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant583_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_39_input_1_1_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant585_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant585_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant588_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [] } }, "_tensor_constant589_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant590_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant592_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant593_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant595_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant596_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_99.QLV4_output_scale", "name_in_graph": "_tensor_constant596_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant597_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant597_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant600_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant600_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant603_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant603_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant604_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant607_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant607_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant610_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant611_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant611_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant612_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant613_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant615_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant618_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant618_r0", "placements": [] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant620_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant622_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant623_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant623_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant625_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant625_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant626_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_104.QLV4_output_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant627_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant629_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant629_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant633_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant634_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant636_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant636_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant640_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_0_1_scale", "name_in_graph": "_tensor_constant640_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant641_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_42_input_1_1_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant642_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_0_1_scale", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant643_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_43_input_1_1_scale", "name_in_graph": "_tensor_constant643_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant645_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant648_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant650_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant652_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant652_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant653_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant655_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_109.QLV4_output_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant661_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant661_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant663_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant669_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant669_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant670_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant671_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant672_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant672_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant673_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant675_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant680_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant682_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant683_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant683_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant685_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant686_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_114.QLV4_output_scale", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant687_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant687_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant690_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant690_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant691_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant694_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant694_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant698_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant698_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant700_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_0_1_scale", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant701_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_46_input_1_1_scale", "name_in_graph": "_tensor_constant701_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant702_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_0_1_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant703_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_47_input_1_1_scale", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant705_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant710_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant710_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant712_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant712_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant713_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant713_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant715_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant716_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_119.QLV4_output_scale", "name_in_graph": "_tensor_constant716_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant719_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant719_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant720_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant721_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant723_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant723_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant727_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant727_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant730_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant730_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant731_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant732_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant733_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [] } }, "_tensor_constant739_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant739_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant740_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant742_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant743_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant745_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant745_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant746_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_124.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant747_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant749_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant756_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant756_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant759_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant759_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant760_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_0_1_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant761_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_50_input_1_1_scale", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant762_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_0_1_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant763_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_51_input_1_1_scale", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant768_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant768_r0", "placements": [] } }, "_tensor_constant769_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant770_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant770_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant772_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant775_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant776_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_129.QLV4_output_scale", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant777_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant777_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant779_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant780_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant781_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant781_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant788_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant788_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant790_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant791_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant792_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant793_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant793_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant795_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant795_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant798_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant798_r0", "placements": [] } }, "_tensor_constant799_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant799_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant800_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant800_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant803_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant803_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant806_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_134.QLV4_output_scale", "name_in_graph": "_tensor_constant806_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant810_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant810_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant814_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant814_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant816_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant816_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant817_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant817_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant818_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant818_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant819_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant819_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant829_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant829_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant830_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant830_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant832_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant832_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant833_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant833_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant835_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant835_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant836_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "prefill_model.mul_139.QLV4_output_scale", "name_in_graph": "_tensor_constant836_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant837_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant837_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant839_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant839_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant840_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant840_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant841_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant841_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant843_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant843_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 1, 1920 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 1920 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 1920, 1920 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 1920 ], "dtype": "i32" }, "d0_arg4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 1, 1920 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 1, 1920, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 1, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1", "d0_arg4_1", "d0_arg5_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg3_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg4_1", "d0_arg1_1", "d0_arg5_1", "d0_arg2_1", "_tensor_constant19_r0", "_param_constant6_r0", "_tensor_constant20_r0", "_tensor_constant22_r0", "_param_constant7_r0", "_tensor_constant23_r0", "_tensor_constant25_r0", "_tensor_constant26_r0", "_param_constant8_r0", "_tensor_constant27_r0", "_tensor_constant29_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "ce6abc136515c0a87e2214d4b762465c" }, "3": { "kind": "edf", "inputs": [ "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant30_r0", "_param_constant11_r0", "_tensor_constant31_r0", "_tensor_constant33_r0", "_param_constant12_r0", "_tensor_constant34_r0", "_tensor_constant36_r0", "_param_constant13_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "_tensor_constant39_r0", "d0_arg60_1", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "_tensor_constant43_r0", "d0_arg0_1", "d0_arg6_1", "d0_arg1_1", "d0_arg7_1", "_tensor_constant45_r0", "d0_arg2_1", "_tensor_constant48_r0", "_tensor_constant49_r0", "_param_constant14_r0", "_tensor_constant50_r0", "_tensor_constant52_r0", "_param_constant15_r0", "_tensor_constant53_r0", "_tensor_constant55_r0", "_tensor_constant56_r0", "_param_constant16_r0", "_tensor_constant57_r0", "_tensor_constant59_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "4": { "kind": "edf", "inputs": [ "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant60_r0", "_param_constant19_r0", "_tensor_constant61_r0", "_tensor_constant63_r0", "_param_constant20_r0", "_tensor_constant64_r0", "_tensor_constant66_r0", "_param_constant21_r0", "_tensor_constant67_r0", "_tensor_constant68_r0", "_tensor_constant69_r0", "d0_arg60_1", "_tensor_constant70_r0", "_tensor_constant71_r0", "_tensor_constant72_r0", "_tensor_constant73_r0", "d0_arg0_1", "d0_arg8_1", "d0_arg1_1", "d0_arg9_1", "_tensor_constant75_r0", "d0_arg2_1", "_tensor_constant78_r0", "_tensor_constant79_r0", "_param_constant22_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_param_constant23_r0", "_tensor_constant83_r0", "_tensor_constant85_r0", "_tensor_constant86_r0", "_param_constant24_r0", "_tensor_constant87_r0", "_tensor_constant89_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "5": { "kind": "edf", "inputs": [ "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant90_r0", "_param_constant27_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant28_r0", "_tensor_constant94_r0", "_tensor_constant96_r0", "_param_constant29_r0", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "d0_arg60_1", "_tensor_constant100_r0", "_tensor_constant101_r0", "_tensor_constant102_r0", "_tensor_constant103_r0", "d0_arg0_1", "d0_arg10_1", "d0_arg1_1", "d0_arg11_1", "_tensor_constant105_r0", "d0_arg2_1", "_tensor_constant108_r0", "_tensor_constant109_r0", "_param_constant30_r0", "_tensor_constant110_r0", "_tensor_constant112_r0", "_param_constant31_r0", "_tensor_constant113_r0", "_tensor_constant115_r0", "_tensor_constant116_r0", "_param_constant32_r0", "_tensor_constant117_r0", "_tensor_constant119_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "6": { "kind": "edf", "inputs": [ "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant120_r0", "_param_constant35_r0", "_tensor_constant121_r0", "_tensor_constant123_r0", "_param_constant36_r0", "_tensor_constant124_r0", "_tensor_constant126_r0", "_param_constant37_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg60_1", "_tensor_constant130_r0", "_tensor_constant131_r0", "_tensor_constant132_r0", "_tensor_constant133_r0", "d0_arg0_1", "d0_arg12_1", "d0_arg1_1", "d0_arg13_1", "_tensor_constant135_r0", "d0_arg2_1", "_tensor_constant138_r0", "_tensor_constant139_r0", "_param_constant38_r0", "_tensor_constant140_r0", "_tensor_constant142_r0", "_param_constant39_r0", "_tensor_constant143_r0", "_tensor_constant145_r0", "_tensor_constant146_r0", "_param_constant40_r0", "_tensor_constant147_r0", "_tensor_constant149_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "7": { "kind": "edf", "inputs": [ "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant150_r0", "_param_constant43_r0", "_tensor_constant151_r0", "_tensor_constant153_r0", "_param_constant44_r0", "_tensor_constant154_r0", "_tensor_constant156_r0", "_param_constant45_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "_tensor_constant159_r0", "d0_arg60_1", "_tensor_constant160_r0", "_tensor_constant161_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "d0_arg0_1", "d0_arg14_1", "d0_arg1_1", "d0_arg15_1", "_tensor_constant165_r0", "d0_arg2_1", "_tensor_constant168_r0", "_tensor_constant169_r0", "_param_constant46_r0", "_tensor_constant170_r0", "_tensor_constant172_r0", "_param_constant47_r0", "_tensor_constant173_r0", "_tensor_constant175_r0", "_tensor_constant176_r0", "_param_constant48_r0", "_tensor_constant177_r0", "_tensor_constant179_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "8": { "kind": "edf", "inputs": [ "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant180_r0", "_param_constant51_r0", "_tensor_constant181_r0", "_tensor_constant183_r0", "_param_constant52_r0", "_tensor_constant184_r0", "_tensor_constant186_r0", "_param_constant53_r0", "_tensor_constant187_r0", "_tensor_constant188_r0", "_tensor_constant189_r0", "d0_arg60_1", "_tensor_constant190_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_tensor_constant193_r0", "d0_arg0_1", "d0_arg16_1", "d0_arg1_1", "d0_arg17_1", "_tensor_constant195_r0", "d0_arg2_1", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant54_r0", "_tensor_constant200_r0", "_tensor_constant202_r0", "_param_constant55_r0", "_tensor_constant203_r0", "_tensor_constant205_r0", "_tensor_constant206_r0", "_param_constant56_r0", "_tensor_constant207_r0", "_tensor_constant209_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "9": { "kind": "edf", "inputs": [ "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant210_r0", "_param_constant59_r0", "_tensor_constant211_r0", "_tensor_constant213_r0", "_param_constant60_r0", "_tensor_constant214_r0", "_tensor_constant216_r0", "_param_constant61_r0", "_tensor_constant217_r0", "_tensor_constant218_r0", "_tensor_constant219_r0", "d0_arg60_1", "_tensor_constant220_r0", "_tensor_constant221_r0", "_tensor_constant222_r0", "_tensor_constant223_r0", "d0_arg0_1", "d0_arg18_1", "d0_arg1_1", "d0_arg19_1", "_tensor_constant225_r0", "d0_arg2_1", "_tensor_constant228_r0", "_tensor_constant229_r0", "_param_constant62_r0", "_tensor_constant230_r0", "_tensor_constant232_r0", "_param_constant63_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_tensor_constant236_r0", "_param_constant64_r0", "_tensor_constant237_r0", "_tensor_constant239_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "10": { "kind": "edf", "inputs": [ "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant240_r0", "_param_constant67_r0", "_tensor_constant241_r0", "_tensor_constant243_r0", "_param_constant68_r0", "_tensor_constant244_r0", "_tensor_constant246_r0", "_param_constant69_r0", "_tensor_constant247_r0", "_tensor_constant248_r0", "_tensor_constant249_r0", "d0_arg60_1", "_tensor_constant250_r0", "_tensor_constant251_r0", "_tensor_constant252_r0", "_tensor_constant253_r0", "d0_arg0_1", "d0_arg20_1", "d0_arg1_1", "d0_arg21_1", "_tensor_constant255_r0", "d0_arg2_1", "_tensor_constant258_r0", "_tensor_constant259_r0", "_param_constant70_r0", "_tensor_constant260_r0", "_tensor_constant262_r0", "_param_constant71_r0", "_tensor_constant263_r0", "_tensor_constant265_r0", "_tensor_constant266_r0", "_param_constant72_r0", "_tensor_constant267_r0", "_tensor_constant269_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "11": { "kind": "edf", "inputs": [ "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant270_r0", "_param_constant75_r0", "_tensor_constant271_r0", "_tensor_constant273_r0", "_param_constant76_r0", "_tensor_constant274_r0", "_tensor_constant276_r0", "_param_constant77_r0", "_tensor_constant277_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "d0_arg60_1", "_tensor_constant280_r0", "_tensor_constant281_r0", "_tensor_constant282_r0", "_tensor_constant283_r0", "d0_arg0_1", "d0_arg22_1", "d0_arg1_1", "d0_arg23_1", "_tensor_constant285_r0", "d0_arg2_1", "_tensor_constant288_r0", "_tensor_constant289_r0", "_param_constant78_r0", "_tensor_constant290_r0", "_tensor_constant292_r0", "_param_constant79_r0", "_tensor_constant293_r0", "_tensor_constant295_r0", "_tensor_constant296_r0", "_param_constant80_r0", "_tensor_constant297_r0", "_tensor_constant299_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "12": { "kind": "edf", "inputs": [ "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant300_r0", "_param_constant83_r0", "_tensor_constant301_r0", "_tensor_constant303_r0", "_param_constant84_r0", "_tensor_constant304_r0", "_tensor_constant306_r0", "_param_constant85_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_tensor_constant309_r0", "d0_arg60_1", "_tensor_constant310_r0", "_tensor_constant311_r0", "_tensor_constant312_r0", "_tensor_constant313_r0", "d0_arg0_1", "d0_arg24_1", "d0_arg1_1", "d0_arg25_1", "_tensor_constant315_r0", "d0_arg2_1", "_tensor_constant318_r0", "_tensor_constant319_r0", "_param_constant86_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant87_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_tensor_constant326_r0", "_param_constant88_r0", "_tensor_constant327_r0", "_tensor_constant329_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "13": { "kind": "edf", "inputs": [ "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant330_r0", "_param_constant91_r0", "_tensor_constant331_r0", "_tensor_constant333_r0", "_param_constant92_r0", "_tensor_constant334_r0", "_tensor_constant336_r0", "_param_constant93_r0", "_tensor_constant337_r0", "_tensor_constant338_r0", "_tensor_constant339_r0", "d0_arg60_1", "_tensor_constant340_r0", "_tensor_constant341_r0", "_tensor_constant342_r0", "_tensor_constant343_r0", "d0_arg0_1", "d0_arg26_1", "d0_arg1_1", "d0_arg27_1", "_tensor_constant345_r0", "d0_arg2_1", "_tensor_constant348_r0", "_tensor_constant349_r0", "_param_constant94_r0", "_tensor_constant350_r0", "_tensor_constant352_r0", "_param_constant95_r0", "_tensor_constant353_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_param_constant96_r0", "_tensor_constant357_r0", "_tensor_constant359_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "14": { "kind": "edf", "inputs": [ "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant360_r0", "_param_constant99_r0", "_tensor_constant361_r0", "_tensor_constant363_r0", "_param_constant100_r0", "_tensor_constant364_r0", "_tensor_constant366_r0", "_param_constant101_r0", "_tensor_constant367_r0", "_tensor_constant368_r0", "_tensor_constant369_r0", "d0_arg60_1", "_tensor_constant370_r0", "_tensor_constant371_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "d0_arg0_1", "d0_arg28_1", "d0_arg1_1", "d0_arg29_1", "_tensor_constant375_r0", "d0_arg2_1", "_tensor_constant378_r0", "_tensor_constant379_r0", "_param_constant102_r0", "_tensor_constant380_r0", "_tensor_constant382_r0", "_param_constant103_r0", "_tensor_constant383_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "_param_constant104_r0", "_tensor_constant387_r0", "_tensor_constant389_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "15": { "kind": "edf", "inputs": [ "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant390_r0", "_param_constant107_r0", "_tensor_constant391_r0", "_tensor_constant393_r0", "_param_constant108_r0", "_tensor_constant394_r0", "_tensor_constant396_r0", "_param_constant109_r0", "_tensor_constant397_r0", "_tensor_constant398_r0", "_tensor_constant399_r0", "d0_arg60_1", "_tensor_constant400_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_tensor_constant403_r0", "d0_arg0_1", "d0_arg30_1", "d0_arg1_1", "d0_arg31_1", "_tensor_constant405_r0", "d0_arg2_1", "_tensor_constant408_r0", "_tensor_constant409_r0", "_param_constant110_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant111_r0", "_tensor_constant413_r0", "_tensor_constant415_r0", "_tensor_constant416_r0", "_param_constant112_r0", "_tensor_constant417_r0", "_tensor_constant419_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "16": { "kind": "edf", "inputs": [ "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant420_r0", "_param_constant115_r0", "_tensor_constant421_r0", "_tensor_constant423_r0", "_param_constant116_r0", "_tensor_constant424_r0", "_tensor_constant426_r0", "_param_constant117_r0", "_tensor_constant427_r0", "_tensor_constant428_r0", "_tensor_constant429_r0", "d0_arg60_1", "_tensor_constant430_r0", "_tensor_constant431_r0", "_tensor_constant432_r0", "_tensor_constant433_r0", "d0_arg0_1", "d0_arg32_1", "d0_arg1_1", "d0_arg33_1", "_tensor_constant435_r0", "d0_arg2_1", "_tensor_constant438_r0", "_tensor_constant439_r0", "_param_constant118_r0", "_tensor_constant440_r0", "_tensor_constant442_r0", "_param_constant119_r0", "_tensor_constant443_r0", "_tensor_constant445_r0", "_tensor_constant446_r0", "_param_constant120_r0", "_tensor_constant447_r0", "_tensor_constant449_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "17": { "kind": "edf", "inputs": [ "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant450_r0", "_param_constant123_r0", "_tensor_constant451_r0", "_tensor_constant453_r0", "_param_constant124_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant125_r0", "_tensor_constant457_r0", "_tensor_constant458_r0", "_tensor_constant459_r0", "d0_arg60_1", "_tensor_constant460_r0", "_tensor_constant461_r0", "_tensor_constant462_r0", "_tensor_constant463_r0", "d0_arg0_1", "d0_arg34_1", "d0_arg1_1", "d0_arg35_1", "_tensor_constant465_r0", "d0_arg2_1", "_tensor_constant468_r0", "_tensor_constant469_r0", "_param_constant126_r0", "_tensor_constant470_r0", "_tensor_constant472_r0", "_param_constant127_r0", "_tensor_constant473_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_param_constant128_r0", "_tensor_constant477_r0", "_tensor_constant479_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "18": { "kind": "edf", "inputs": [ "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant480_r0", "_param_constant131_r0", "_tensor_constant481_r0", "_tensor_constant483_r0", "_param_constant132_r0", "_tensor_constant484_r0", "_tensor_constant486_r0", "_param_constant133_r0", "_tensor_constant487_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "d0_arg60_1", "_tensor_constant490_r0", "_tensor_constant491_r0", "_tensor_constant492_r0", "_tensor_constant493_r0", "d0_arg0_1", "d0_arg36_1", "d0_arg1_1", "d0_arg37_1", "_tensor_constant495_r0", "d0_arg2_1", "_tensor_constant498_r0", "_tensor_constant499_r0", "_param_constant134_r0", "_tensor_constant500_r0", "_tensor_constant502_r0", "_param_constant135_r0", "_tensor_constant503_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "_param_constant136_r0", "_tensor_constant507_r0", "_tensor_constant509_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "19": { "kind": "edf", "inputs": [ "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant510_r0", "_param_constant139_r0", "_tensor_constant511_r0", "_tensor_constant513_r0", "_param_constant140_r0", "_tensor_constant514_r0", "_tensor_constant516_r0", "_param_constant141_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_tensor_constant519_r0", "d0_arg60_1", "_tensor_constant520_r0", "_tensor_constant521_r0", "_tensor_constant522_r0", "_tensor_constant523_r0", "d0_arg0_1", "d0_arg38_1", "d0_arg1_1", "d0_arg39_1", "_tensor_constant525_r0", "d0_arg2_1", "_tensor_constant528_r0", "_tensor_constant529_r0", "_param_constant142_r0", "_tensor_constant530_r0", "_tensor_constant532_r0", "_param_constant143_r0", "_tensor_constant533_r0", "_tensor_constant535_r0", "_tensor_constant536_r0", "_param_constant144_r0", "_tensor_constant537_r0", "_tensor_constant539_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "20": { "kind": "edf", "inputs": [ "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant540_r0", "_param_constant147_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant148_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_param_constant149_r0", "_tensor_constant547_r0", "_tensor_constant548_r0", "_tensor_constant549_r0", "d0_arg60_1", "_tensor_constant550_r0", "_tensor_constant551_r0", "_tensor_constant552_r0", "_tensor_constant553_r0", "d0_arg0_1", "d0_arg40_1", "d0_arg1_1", "d0_arg41_1", "_tensor_constant555_r0", "d0_arg2_1", "_tensor_constant558_r0", "_tensor_constant559_r0", "_param_constant150_r0", "_tensor_constant560_r0", "_tensor_constant562_r0", "_param_constant151_r0", "_tensor_constant563_r0", "_tensor_constant565_r0", "_tensor_constant566_r0", "_param_constant152_r0", "_tensor_constant567_r0", "_tensor_constant569_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "21": { "kind": "edf", "inputs": [ "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant570_r0", "_param_constant155_r0", "_tensor_constant571_r0", "_tensor_constant573_r0", "_param_constant156_r0", "_tensor_constant574_r0", "_tensor_constant576_r0", "_param_constant157_r0", "_tensor_constant577_r0", "_tensor_constant578_r0", "_tensor_constant579_r0", "d0_arg60_1", "_tensor_constant580_r0", "_tensor_constant581_r0", "_tensor_constant582_r0", "_tensor_constant583_r0", "d0_arg0_1", "d0_arg42_1", "d0_arg1_1", "d0_arg43_1", "_tensor_constant585_r0", "d0_arg2_1", "_tensor_constant588_r0", "_tensor_constant589_r0", "_param_constant158_r0", "_tensor_constant590_r0", "_tensor_constant592_r0", "_param_constant159_r0", "_tensor_constant593_r0", "_tensor_constant595_r0", "_tensor_constant596_r0", "_param_constant160_r0", "_tensor_constant597_r0", "_tensor_constant599_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "22": { "kind": "edf", "inputs": [ "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant600_r0", "_param_constant163_r0", "_tensor_constant601_r0", "_tensor_constant603_r0", "_param_constant164_r0", "_tensor_constant604_r0", "_tensor_constant606_r0", "_param_constant165_r0", "_tensor_constant607_r0", "_tensor_constant608_r0", "_tensor_constant609_r0", "d0_arg60_1", "_tensor_constant610_r0", "_tensor_constant611_r0", "_tensor_constant612_r0", "_tensor_constant613_r0", "d0_arg0_1", "d0_arg44_1", "d0_arg1_1", "d0_arg45_1", "_tensor_constant615_r0", "d0_arg2_1", "_tensor_constant618_r0", "_tensor_constant619_r0", "_param_constant166_r0", "_tensor_constant620_r0", "_tensor_constant622_r0", "_param_constant167_r0", "_tensor_constant623_r0", "_tensor_constant625_r0", "_tensor_constant626_r0", "_param_constant168_r0", "_tensor_constant627_r0", "_tensor_constant629_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "23": { "kind": "edf", "inputs": [ "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant630_r0", "_param_constant171_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_param_constant172_r0", "_tensor_constant634_r0", "_tensor_constant636_r0", "_param_constant173_r0", "_tensor_constant637_r0", "_tensor_constant638_r0", "_tensor_constant639_r0", "d0_arg60_1", "_tensor_constant640_r0", "_tensor_constant641_r0", "_tensor_constant642_r0", "_tensor_constant643_r0", "d0_arg0_1", "d0_arg46_1", "d0_arg1_1", "d0_arg47_1", "_tensor_constant645_r0", "d0_arg2_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_param_constant174_r0", "_tensor_constant650_r0", "_tensor_constant652_r0", "_param_constant175_r0", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant176_r0", "_tensor_constant657_r0", "_tensor_constant659_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "24": { "kind": "edf", "inputs": [ "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant660_r0", "_param_constant179_r0", "_tensor_constant661_r0", "_tensor_constant663_r0", "_param_constant180_r0", "_tensor_constant664_r0", "_tensor_constant666_r0", "_param_constant181_r0", "_tensor_constant667_r0", "_tensor_constant668_r0", "_tensor_constant669_r0", "d0_arg60_1", "_tensor_constant670_r0", "_tensor_constant671_r0", "_tensor_constant672_r0", "_tensor_constant673_r0", "d0_arg0_1", "d0_arg48_1", "d0_arg1_1", "d0_arg49_1", "_tensor_constant675_r0", "d0_arg2_1", "_tensor_constant678_r0", "_tensor_constant679_r0", "_param_constant182_r0", "_tensor_constant680_r0", "_tensor_constant682_r0", "_param_constant183_r0", "_tensor_constant683_r0", "_tensor_constant685_r0", "_tensor_constant686_r0", "_param_constant184_r0", "_tensor_constant687_r0", "_tensor_constant689_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "25": { "kind": "edf", "inputs": [ "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant690_r0", "_param_constant187_r0", "_tensor_constant691_r0", "_tensor_constant693_r0", "_param_constant188_r0", "_tensor_constant694_r0", "_tensor_constant696_r0", "_param_constant189_r0", "_tensor_constant697_r0", "_tensor_constant698_r0", "_tensor_constant699_r0", "d0_arg60_1", "_tensor_constant700_r0", "_tensor_constant701_r0", "_tensor_constant702_r0", "_tensor_constant703_r0", "d0_arg0_1", "d0_arg50_1", "d0_arg1_1", "d0_arg51_1", "_tensor_constant705_r0", "d0_arg2_1", "_tensor_constant708_r0", "_tensor_constant709_r0", "_param_constant190_r0", "_tensor_constant710_r0", "_tensor_constant712_r0", "_param_constant191_r0", "_tensor_constant713_r0", "_tensor_constant715_r0", "_tensor_constant716_r0", "_param_constant192_r0", "_tensor_constant717_r0", "_tensor_constant719_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "26": { "kind": "edf", "inputs": [ "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant720_r0", "_param_constant195_r0", "_tensor_constant721_r0", "_tensor_constant723_r0", "_param_constant196_r0", "_tensor_constant724_r0", "_tensor_constant726_r0", "_param_constant197_r0", "_tensor_constant727_r0", "_tensor_constant728_r0", "_tensor_constant729_r0", "d0_arg60_1", "_tensor_constant730_r0", "_tensor_constant731_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "d0_arg0_1", "d0_arg52_1", "d0_arg1_1", "d0_arg53_1", "_tensor_constant735_r0", "d0_arg2_1", "_tensor_constant738_r0", "_tensor_constant739_r0", "_param_constant198_r0", "_tensor_constant740_r0", "_tensor_constant742_r0", "_param_constant199_r0", "_tensor_constant743_r0", "_tensor_constant745_r0", "_tensor_constant746_r0", "_param_constant200_r0", "_tensor_constant747_r0", "_tensor_constant749_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "27": { "kind": "edf", "inputs": [ "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant750_r0", "_param_constant203_r0", "_tensor_constant751_r0", "_tensor_constant753_r0", "_param_constant204_r0", "_tensor_constant754_r0", "_tensor_constant756_r0", "_param_constant205_r0", "_tensor_constant757_r0", "_tensor_constant758_r0", "_tensor_constant759_r0", "d0_arg60_1", "_tensor_constant760_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg0_1", "d0_arg54_1", "d0_arg1_1", "d0_arg55_1", "_tensor_constant765_r0", "d0_arg2_1", "_tensor_constant768_r0", "_tensor_constant769_r0", "_param_constant206_r0", "_tensor_constant770_r0", "_tensor_constant772_r0", "_param_constant207_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_tensor_constant776_r0", "_param_constant208_r0", "_tensor_constant777_r0", "_tensor_constant779_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "28": { "kind": "edf", "inputs": [ "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant780_r0", "_param_constant211_r0", "_tensor_constant781_r0", "_tensor_constant783_r0", "_param_constant212_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant213_r0", "_tensor_constant787_r0", "_tensor_constant788_r0", "_tensor_constant789_r0", "d0_arg60_1", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "_tensor_constant793_r0", "d0_arg0_1", "d0_arg56_1", "d0_arg1_1", "d0_arg57_1", "_tensor_constant795_r0", "d0_arg2_1", "_tensor_constant798_r0", "_tensor_constant799_r0", "_param_constant214_r0", "_tensor_constant800_r0", "_tensor_constant802_r0", "_param_constant215_r0", "_tensor_constant803_r0", "_tensor_constant805_r0", "_tensor_constant806_r0", "_param_constant216_r0", "_tensor_constant807_r0", "_tensor_constant809_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "4bdbfe24d81eb397adad8bf7a1914f4d" }, "29": { "kind": "edf", "inputs": [ "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant810_r0", "_param_constant219_r0", "_tensor_constant811_r0", "_tensor_constant813_r0", "_param_constant220_r0", "_tensor_constant814_r0", "_tensor_constant816_r0", "_param_constant221_r0", "_tensor_constant817_r0", "_tensor_constant818_r0", "_tensor_constant819_r0", "d0_arg60_1", "d0_arg0_1", "d0_arg58_1", "d0_arg1_1", "d0_arg59_1", "d0_arg2_1", "_tensor_constant829_r0", "_param_constant222_r0", "_tensor_constant830_r0", "_tensor_constant832_r0", "_param_constant223_r0", "_tensor_constant833_r0", "_tensor_constant835_r0", "_tensor_constant836_r0", "_param_constant224_r0", "_tensor_constant837_r0", "_tensor_constant839_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant840_r0", "_param_constant227_r0", "_tensor_constant841_r0", "_tensor_constant843_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "f919d50f5fa4992d3ccbc407db54ec0b" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 1, 1920 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 1, 1920 ], "dtype": "i32", "idx": 1 }, "causal_mask": { "shape": [ 1, 1920, 1920 ], "dtype": "bool", "idx": 2 }, "input_ids": { "shape": [ 1, 1920 ], "dtype": "i32", "idx": 3 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 4 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 5 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 6 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "position_ids": { "shape": [ 1, 1920 ], "dtype": "i32", "idx": 60 } }, "outputs": { "logits": { "shape": [ 1, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 1920 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 1920 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 1920 ], [ 0, 1920 ] ], "origin": "causal_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 1920 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg4_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg5_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 1 ], [ 0, 1920 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "ce6abc136515c0a87e2214d4b762465c": null, "4bdbfe24d81eb397adad8bf7a1914f4d": null, "f919d50f5fa4992d3ccbc407db54ec0b": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv0-b1-attn1920-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv2047-b64-attn2048", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant18_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant18_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant19_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant21_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant21_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant24_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant24_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_5.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant28_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant28_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant32_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant32_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant35_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant35_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant39_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant44_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant44_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant46_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant46_r0", "placements": [] } }, "_tensor_constant47_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant47_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant48_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant48_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant51_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant51_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant54_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant54_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant58_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant58_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant62_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant62_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant68_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant69_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [] } }, "_tensor_constant76_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant76_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant77_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant77_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant79_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant82_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant83_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_15.QLV4_output_scale", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant84_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant84_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant86_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant87_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant88_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant88_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant95_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant95_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant97_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant98_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant99_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant104_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant104_r0", "placements": [] } }, "_tensor_constant105_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant108_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant109_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant111_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant111_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant112_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant115_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant125_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant125_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant126_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant127_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant128_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant129_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [] } }, "_tensor_constant134_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant134_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant135_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant138_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant138_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant140_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant141_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_25.QLV4_output_scale", "name_in_graph": "_tensor_constant141_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant144_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant144_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant145_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant148_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant148_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant152_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant152_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant155_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant155_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant156_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant157_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant158_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant164_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant164_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant166_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant166_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant167_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant167_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant169_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant170_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant171_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant171_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant174_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant174_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant175_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant182_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant182_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant184_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant185_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant185_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant186_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant187_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant189_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant195_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant196_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant196_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant198_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_35.QLV4_output_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant204_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant204_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant206_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant212_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant212_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant213_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant214_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant215_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant215_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant216_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant218_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant220_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant222_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant224_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant224_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant225_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant227_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant227_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant228_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant229_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant231_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant231_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant235_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant236_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant238_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant238_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant242_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant242_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant243_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant244_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant245_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant245_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant247_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant249_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant249_r0", "placements": [] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant251_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant253_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant254_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant254_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant256_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant256_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant257_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_45.QLV4_output_scale", "name_in_graph": "_tensor_constant257_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant258_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant258_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant261_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant261_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant264_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant264_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant265_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant268_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant268_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant271_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant272_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant272_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant273_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant274_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant276_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant278_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [] } }, "_tensor_constant279_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant280_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant282_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant283_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant285_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant286_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant286_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant287_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant287_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant289_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant291_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant291_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant294_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant294_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant296_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant298_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant298_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant301_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant302_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant302_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant303_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant305_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant305_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant307_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant307_r0", "placements": [] } }, "_tensor_constant308_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant311_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant312_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant314_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant314_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_55.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant316_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant316_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant318_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant325_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant326_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant328_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant328_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant329_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant331_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant332_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant332_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant334_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant336_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [] } }, "_tensor_constant337_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant340_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant341_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant343_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant344_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_60.QLV4_output_scale", "name_in_graph": "_tensor_constant344_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant345_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant347_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant347_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant348_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant349_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant351_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant351_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant354_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant354_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant355_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant356_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant357_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant358_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant358_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant359_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant361_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant363_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant365_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant365_r0", "placements": [] } }, "_tensor_constant366_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant370_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant372_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_65.QLV4_output_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant374_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant374_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant376_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant376_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant377_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant377_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant378_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant378_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant381_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant381_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant384_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant384_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant385_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant386_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant387_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant388_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant388_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant389_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant392_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant392_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant394_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant394_r0", "placements": [] } }, "_tensor_constant395_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant395_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant401_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_70.QLV4_output_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant403_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant405_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant406_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant406_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant407_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant407_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant409_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant414_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant414_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant415_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_56_input_0_1_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant417_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_56_input_1_1_scale", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant418_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_57_input_0_1_scale", "name_in_graph": "_tensor_constant418_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant419_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_57_input_1_1_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant421_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant423_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [] } }, "_tensor_constant424_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant425_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant425_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant430_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_75.QLV4_output_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant432_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant434_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant434_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant436_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant436_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant438_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant439_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant441_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant441_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant444_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant444_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant445_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_60_input_0_1_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_60_input_1_1_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant447_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_61_input_0_1_scale", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant448_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_61_input_1_1_scale", "name_in_graph": "_tensor_constant448_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant452_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant452_r0", "placements": [] } }, "_tensor_constant453_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant459_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_80.QLV4_output_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant461_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant463_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant464_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant464_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant465_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant467_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant467_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant468_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant468_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant471_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant471_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant474_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_64_input_0_1_scale", "name_in_graph": "_tensor_constant474_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant475_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_64_input_1_1_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_65_input_0_1_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant477_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_65_input_1_1_scale", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant479_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant481_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant481_r0", "placements": [] } }, "_tensor_constant482_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant482_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant485_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant485_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant488_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant489_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_85.QLV4_output_scale", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant490_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant492_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant494_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant494_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant496_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant496_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant497_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant497_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant499_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant501_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant501_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant503_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_68_input_0_1_scale", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant504_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_68_input_1_1_scale", "name_in_graph": "_tensor_constant504_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant505_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_69_input_0_1_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_69_input_1_1_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant508_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant508_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant510_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [] } }, "_tensor_constant511_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant512_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant512_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant515_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant515_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant517_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant518_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_90.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant519_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant521_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant523_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant525_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant526_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant526_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant528_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant529_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant531_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant531_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant532_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_72_input_0_1_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant533_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_72_input_1_1_scale", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant534_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_73_input_0_1_scale", "name_in_graph": "_tensor_constant534_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant535_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_73_input_1_1_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant537_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant539_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant546_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant547_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_95.QLV4_output_scale", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant550_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant552_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant554_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant554_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant555_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant557_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant557_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant558_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant558_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant559_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant561_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_76_input_0_1_scale", "name_in_graph": "_tensor_constant561_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant562_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_76_input_1_1_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant563_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_77_input_0_1_scale", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant564_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_77_input_1_1_scale", "name_in_graph": "_tensor_constant564_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant568_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant568_r0", "placements": [] } }, "_tensor_constant569_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant572_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant572_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant575_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant575_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant576_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_100.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant581_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant583_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant584_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant584_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant586_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant586_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant587_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant587_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant588_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant589_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant590_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_80_input_0_1_scale", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant591_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_80_input_1_1_scale", "name_in_graph": "_tensor_constant591_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant592_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_81_input_0_1_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant593_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_81_input_1_1_scale", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant595_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant597_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant597_r0", "placements": [] } }, "_tensor_constant598_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant598_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant602_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant602_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant604_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant605_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_105.QLV4_output_scale", "name_in_graph": "_tensor_constant605_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant610_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant612_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant613_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant615_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant616_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant616_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant617_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant617_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant618_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant618_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_84_input_0_1_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant620_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_84_input_1_1_scale", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant621_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_85_input_0_1_scale", "name_in_graph": "_tensor_constant621_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant622_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_85_input_1_1_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant624_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant624_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant626_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [] } }, "_tensor_constant627_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant628_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant628_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant633_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant634_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_110.QLV4_output_scale", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant635_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant635_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant641_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant642_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant644_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant644_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant645_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant646_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant646_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant647_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant647_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant648_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_88_input_0_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_88_input_1_1_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant650_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_89_input_0_1_scale", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant651_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_89_input_1_1_scale", "name_in_graph": "_tensor_constant651_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant653_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant655_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant662_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant662_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant663_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_115.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant670_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant671_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant673_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant674_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant674_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant675_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant676_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant676_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant677_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_92_input_0_1_scale", "name_in_graph": "_tensor_constant677_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_92_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_93_input_0_1_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant680_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_93_input_1_1_scale", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant682_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant684_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant684_r0", "placements": [] } }, "_tensor_constant685_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant686_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant688_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant688_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant691_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant692_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_120.QLV4_output_scale", "name_in_graph": "_tensor_constant692_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant695_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant695_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant700_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant702_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant703_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant704_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant704_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant705_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant706_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_96_input_0_1_scale", "name_in_graph": "_tensor_constant706_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant707_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_96_input_1_1_scale", "name_in_graph": "_tensor_constant707_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_97_input_0_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_97_input_1_1_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant711_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant711_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant713_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant713_r0", "placements": [] } }, "_tensor_constant714_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant714_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant715_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant718_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant718_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant720_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant721_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_125.QLV4_output_scale", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant722_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant722_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant725_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant725_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant731_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant732_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant733_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant734_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant734_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_100_input_0_1_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant736_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_100_input_1_1_scale", "name_in_graph": "_tensor_constant736_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant737_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_101_input_0_1_scale", "name_in_graph": "_tensor_constant737_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_101_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant740_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant742_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [] } }, "_tensor_constant743_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant744_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant744_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant746_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant747_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant749_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_130.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant755_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant755_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant760_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant761_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant762_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant763_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant764_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_104_input_0_1_scale", "name_in_graph": "_tensor_constant764_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_104_input_1_1_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant766_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_105_input_0_1_scale", "name_in_graph": "_tensor_constant766_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant767_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_105_input_1_1_scale", "name_in_graph": "_tensor_constant767_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant769_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant771_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant771_r0", "placements": [] } }, "_tensor_constant772_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant775_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant776_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant778_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant778_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant779_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_135.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant780_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant782_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant782_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant790_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant791_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant792_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant801_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant801_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant804_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant804_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant808_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_140.QLV4_output_scale", "name_in_graph": "_tensor_constant808_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant812_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant812_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant815_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant815_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 64, 1 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 64, 1 ], "dtype": "i32" }, "d0_arg6_1": { "shape": [ 30720 ], "dtype": "i32" }, "d0_arg7_1": { "shape": [ 8128 ], "dtype": "i32" }, "d0_arg8_1": { "shape": [ 30720 ], "dtype": "i32" }, "d0_arg9_1": { "shape": [ 8128 ], "dtype": "i32" }, "d0_arg10_1": { "shape": [ 64, 1 ], "dtype": "i32" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg61_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg62_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg63_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg64_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg65_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg66_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg67_1": { "shape": [ 64, 2048 ], "dtype": "bool" }, "d0_arg68_1": { "shape": [ 64, 1 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 64, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 64, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1", "d0_arg61_1", "d0_arg62_1", "d0_arg63_1", "d0_arg64_1", "d0_arg65_1", "d0_arg66_1", "d0_arg67_1", "d0_arg68_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg10_1", "d0_arg67_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg68_1", "d0_arg11_1", "d0_arg6_1", "d0_arg12_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant18_r0", "_param_constant6_r0", "_tensor_constant19_r0", "_tensor_constant21_r0", "_param_constant7_r0", "_tensor_constant22_r0", "_tensor_constant24_r0", "_tensor_constant25_r0", "_param_constant8_r0", "_tensor_constant26_r0", "_tensor_constant28_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "1ed55b4780a08696d73ce7879b62802d" }, "3": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant29_r0", "_param_constant11_r0", "_tensor_constant30_r0", "_tensor_constant32_r0", "_param_constant12_r0", "_tensor_constant33_r0", "_tensor_constant35_r0", "_param_constant13_r0", "_tensor_constant36_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "d0_arg68_1", "_tensor_constant39_r0", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "d0_arg13_1", "d0_arg6_1", "d0_arg14_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant44_r0", "_tensor_constant46_r0", "_tensor_constant47_r0", "_param_constant14_r0", "_tensor_constant48_r0", "_tensor_constant50_r0", "_param_constant15_r0", "_tensor_constant51_r0", "_tensor_constant53_r0", "_tensor_constant54_r0", "_param_constant16_r0", "_tensor_constant55_r0", "_tensor_constant57_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "4": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant58_r0", "_param_constant19_r0", "_tensor_constant59_r0", "_tensor_constant61_r0", "_param_constant20_r0", "_tensor_constant62_r0", "_tensor_constant64_r0", "_param_constant21_r0", "_tensor_constant65_r0", "_tensor_constant66_r0", "_tensor_constant67_r0", "d0_arg68_1", "_tensor_constant68_r0", "_tensor_constant69_r0", "_tensor_constant70_r0", "_tensor_constant71_r0", "d0_arg15_1", "d0_arg6_1", "d0_arg16_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant73_r0", "_tensor_constant75_r0", "_tensor_constant76_r0", "_param_constant22_r0", "_tensor_constant77_r0", "_tensor_constant79_r0", "_param_constant23_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_tensor_constant83_r0", "_param_constant24_r0", "_tensor_constant84_r0", "_tensor_constant86_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "5": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant87_r0", "_param_constant27_r0", "_tensor_constant88_r0", "_tensor_constant90_r0", "_param_constant28_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant29_r0", "_tensor_constant94_r0", "_tensor_constant95_r0", "_tensor_constant96_r0", "d0_arg68_1", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "_tensor_constant100_r0", "d0_arg17_1", "d0_arg6_1", "d0_arg18_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant102_r0", "_tensor_constant104_r0", "_tensor_constant105_r0", "_param_constant30_r0", "_tensor_constant106_r0", "_tensor_constant108_r0", "_param_constant31_r0", "_tensor_constant109_r0", "_tensor_constant111_r0", "_tensor_constant112_r0", "_param_constant32_r0", "_tensor_constant113_r0", "_tensor_constant115_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "6": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant116_r0", "_param_constant35_r0", "_tensor_constant117_r0", "_tensor_constant119_r0", "_param_constant36_r0", "_tensor_constant120_r0", "_tensor_constant122_r0", "_param_constant37_r0", "_tensor_constant123_r0", "_tensor_constant124_r0", "_tensor_constant125_r0", "d0_arg68_1", "_tensor_constant126_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg19_1", "d0_arg6_1", "d0_arg20_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant131_r0", "_tensor_constant133_r0", "_tensor_constant134_r0", "_param_constant38_r0", "_tensor_constant135_r0", "_tensor_constant137_r0", "_param_constant39_r0", "_tensor_constant138_r0", "_tensor_constant140_r0", "_tensor_constant141_r0", "_param_constant40_r0", "_tensor_constant142_r0", "_tensor_constant144_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "7": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant145_r0", "_param_constant43_r0", "_tensor_constant146_r0", "_tensor_constant148_r0", "_param_constant44_r0", "_tensor_constant149_r0", "_tensor_constant151_r0", "_param_constant45_r0", "_tensor_constant152_r0", "_tensor_constant153_r0", "_tensor_constant154_r0", "d0_arg68_1", "_tensor_constant155_r0", "_tensor_constant156_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "d0_arg21_1", "d0_arg6_1", "d0_arg22_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant160_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "_param_constant46_r0", "_tensor_constant164_r0", "_tensor_constant166_r0", "_param_constant47_r0", "_tensor_constant167_r0", "_tensor_constant169_r0", "_tensor_constant170_r0", "_param_constant48_r0", "_tensor_constant171_r0", "_tensor_constant173_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "8": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant174_r0", "_param_constant51_r0", "_tensor_constant175_r0", "_tensor_constant177_r0", "_param_constant52_r0", "_tensor_constant178_r0", "_tensor_constant180_r0", "_param_constant53_r0", "_tensor_constant181_r0", "_tensor_constant182_r0", "_tensor_constant183_r0", "d0_arg68_1", "_tensor_constant184_r0", "_tensor_constant185_r0", "_tensor_constant186_r0", "_tensor_constant187_r0", "d0_arg23_1", "d0_arg6_1", "d0_arg24_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant189_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_param_constant54_r0", "_tensor_constant193_r0", "_tensor_constant195_r0", "_param_constant55_r0", "_tensor_constant196_r0", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant56_r0", "_tensor_constant200_r0", "_tensor_constant202_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "9": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant203_r0", "_param_constant59_r0", "_tensor_constant204_r0", "_tensor_constant206_r0", "_param_constant60_r0", "_tensor_constant207_r0", "_tensor_constant209_r0", "_param_constant61_r0", "_tensor_constant210_r0", "_tensor_constant211_r0", "_tensor_constant212_r0", "d0_arg68_1", "_tensor_constant213_r0", "_tensor_constant214_r0", "_tensor_constant215_r0", "_tensor_constant216_r0", "d0_arg25_1", "d0_arg6_1", "d0_arg26_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant218_r0", "_tensor_constant220_r0", "_tensor_constant221_r0", "_param_constant62_r0", "_tensor_constant222_r0", "_tensor_constant224_r0", "_param_constant63_r0", "_tensor_constant225_r0", "_tensor_constant227_r0", "_tensor_constant228_r0", "_param_constant64_r0", "_tensor_constant229_r0", "_tensor_constant231_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "10": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant232_r0", "_param_constant67_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_param_constant68_r0", "_tensor_constant236_r0", "_tensor_constant238_r0", "_param_constant69_r0", "_tensor_constant239_r0", "_tensor_constant240_r0", "_tensor_constant241_r0", "d0_arg68_1", "_tensor_constant242_r0", "_tensor_constant243_r0", "_tensor_constant244_r0", "_tensor_constant245_r0", "d0_arg27_1", "d0_arg6_1", "d0_arg28_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant247_r0", "_tensor_constant249_r0", "_tensor_constant250_r0", "_param_constant70_r0", "_tensor_constant251_r0", "_tensor_constant253_r0", "_param_constant71_r0", "_tensor_constant254_r0", "_tensor_constant256_r0", "_tensor_constant257_r0", "_param_constant72_r0", "_tensor_constant258_r0", "_tensor_constant260_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "11": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant261_r0", "_param_constant75_r0", "_tensor_constant262_r0", "_tensor_constant264_r0", "_param_constant76_r0", "_tensor_constant265_r0", "_tensor_constant267_r0", "_param_constant77_r0", "_tensor_constant268_r0", "_tensor_constant269_r0", "_tensor_constant270_r0", "d0_arg68_1", "_tensor_constant271_r0", "_tensor_constant272_r0", "_tensor_constant273_r0", "_tensor_constant274_r0", "d0_arg29_1", "d0_arg6_1", "d0_arg30_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant276_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "_param_constant78_r0", "_tensor_constant280_r0", "_tensor_constant282_r0", "_param_constant79_r0", "_tensor_constant283_r0", "_tensor_constant285_r0", "_tensor_constant286_r0", "_param_constant80_r0", "_tensor_constant287_r0", "_tensor_constant289_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "12": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant290_r0", "_param_constant83_r0", "_tensor_constant291_r0", "_tensor_constant293_r0", "_param_constant84_r0", "_tensor_constant294_r0", "_tensor_constant296_r0", "_param_constant85_r0", "_tensor_constant297_r0", "_tensor_constant298_r0", "_tensor_constant299_r0", "d0_arg68_1", "_tensor_constant300_r0", "_tensor_constant301_r0", "_tensor_constant302_r0", "_tensor_constant303_r0", "d0_arg31_1", "d0_arg6_1", "d0_arg32_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant305_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_param_constant86_r0", "_tensor_constant309_r0", "_tensor_constant311_r0", "_param_constant87_r0", "_tensor_constant312_r0", "_tensor_constant314_r0", "_tensor_constant315_r0", "_param_constant88_r0", "_tensor_constant316_r0", "_tensor_constant318_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "13": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant319_r0", "_param_constant91_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant92_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_param_constant93_r0", "_tensor_constant326_r0", "_tensor_constant327_r0", "_tensor_constant328_r0", "d0_arg68_1", "_tensor_constant329_r0", "_tensor_constant330_r0", "_tensor_constant331_r0", "_tensor_constant332_r0", "d0_arg33_1", "d0_arg6_1", "d0_arg34_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant334_r0", "_tensor_constant336_r0", "_tensor_constant337_r0", "_param_constant94_r0", "_tensor_constant338_r0", "_tensor_constant340_r0", "_param_constant95_r0", "_tensor_constant341_r0", "_tensor_constant343_r0", "_tensor_constant344_r0", "_param_constant96_r0", "_tensor_constant345_r0", "_tensor_constant347_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "14": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant348_r0", "_param_constant99_r0", "_tensor_constant349_r0", "_tensor_constant351_r0", "_param_constant100_r0", "_tensor_constant352_r0", "_tensor_constant354_r0", "_param_constant101_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_tensor_constant357_r0", "d0_arg68_1", "_tensor_constant358_r0", "_tensor_constant359_r0", "_tensor_constant360_r0", "_tensor_constant361_r0", "d0_arg35_1", "d0_arg6_1", "d0_arg36_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant363_r0", "_tensor_constant365_r0", "_tensor_constant366_r0", "_param_constant102_r0", "_tensor_constant367_r0", "_tensor_constant369_r0", "_param_constant103_r0", "_tensor_constant370_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "_param_constant104_r0", "_tensor_constant374_r0", "_tensor_constant376_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "15": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant377_r0", "_param_constant107_r0", "_tensor_constant378_r0", "_tensor_constant380_r0", "_param_constant108_r0", "_tensor_constant381_r0", "_tensor_constant383_r0", "_param_constant109_r0", "_tensor_constant384_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "d0_arg68_1", "_tensor_constant387_r0", "_tensor_constant388_r0", "_tensor_constant389_r0", "_tensor_constant390_r0", "d0_arg37_1", "d0_arg6_1", "d0_arg38_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant392_r0", "_tensor_constant394_r0", "_tensor_constant395_r0", "_param_constant110_r0", "_tensor_constant396_r0", "_tensor_constant398_r0", "_param_constant111_r0", "_tensor_constant399_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_param_constant112_r0", "_tensor_constant403_r0", "_tensor_constant405_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "16": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant406_r0", "_param_constant115_r0", "_tensor_constant407_r0", "_tensor_constant409_r0", "_param_constant116_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant117_r0", "_tensor_constant413_r0", "_tensor_constant414_r0", "_tensor_constant415_r0", "d0_arg68_1", "_tensor_constant416_r0", "_tensor_constant417_r0", "_tensor_constant418_r0", "_tensor_constant419_r0", "d0_arg39_1", "d0_arg6_1", "d0_arg40_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant421_r0", "_tensor_constant423_r0", "_tensor_constant424_r0", "_param_constant118_r0", "_tensor_constant425_r0", "_tensor_constant427_r0", "_param_constant119_r0", "_tensor_constant428_r0", "_tensor_constant430_r0", "_tensor_constant431_r0", "_param_constant120_r0", "_tensor_constant432_r0", "_tensor_constant434_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "17": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant435_r0", "_param_constant123_r0", "_tensor_constant436_r0", "_tensor_constant438_r0", "_param_constant124_r0", "_tensor_constant439_r0", "_tensor_constant441_r0", "_param_constant125_r0", "_tensor_constant442_r0", "_tensor_constant443_r0", "_tensor_constant444_r0", "d0_arg68_1", "_tensor_constant445_r0", "_tensor_constant446_r0", "_tensor_constant447_r0", "_tensor_constant448_r0", "d0_arg41_1", "d0_arg6_1", "d0_arg42_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant450_r0", "_tensor_constant452_r0", "_tensor_constant453_r0", "_param_constant126_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant127_r0", "_tensor_constant457_r0", "_tensor_constant459_r0", "_tensor_constant460_r0", "_param_constant128_r0", "_tensor_constant461_r0", "_tensor_constant463_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "18": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant464_r0", "_param_constant131_r0", "_tensor_constant465_r0", "_tensor_constant467_r0", "_param_constant132_r0", "_tensor_constant468_r0", "_tensor_constant470_r0", "_param_constant133_r0", "_tensor_constant471_r0", "_tensor_constant472_r0", "_tensor_constant473_r0", "d0_arg68_1", "_tensor_constant474_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_tensor_constant477_r0", "d0_arg43_1", "d0_arg6_1", "d0_arg44_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant479_r0", "_tensor_constant481_r0", "_tensor_constant482_r0", "_param_constant134_r0", "_tensor_constant483_r0", "_tensor_constant485_r0", "_param_constant135_r0", "_tensor_constant486_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "_param_constant136_r0", "_tensor_constant490_r0", "_tensor_constant492_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "19": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant493_r0", "_param_constant139_r0", "_tensor_constant494_r0", "_tensor_constant496_r0", "_param_constant140_r0", "_tensor_constant497_r0", "_tensor_constant499_r0", "_param_constant141_r0", "_tensor_constant500_r0", "_tensor_constant501_r0", "_tensor_constant502_r0", "d0_arg68_1", "_tensor_constant503_r0", "_tensor_constant504_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "d0_arg45_1", "d0_arg6_1", "d0_arg46_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant508_r0", "_tensor_constant510_r0", "_tensor_constant511_r0", "_param_constant142_r0", "_tensor_constant512_r0", "_tensor_constant514_r0", "_param_constant143_r0", "_tensor_constant515_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_param_constant144_r0", "_tensor_constant519_r0", "_tensor_constant521_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "20": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant522_r0", "_param_constant147_r0", "_tensor_constant523_r0", "_tensor_constant525_r0", "_param_constant148_r0", "_tensor_constant526_r0", "_tensor_constant528_r0", "_param_constant149_r0", "_tensor_constant529_r0", "_tensor_constant530_r0", "_tensor_constant531_r0", "d0_arg68_1", "_tensor_constant532_r0", "_tensor_constant533_r0", "_tensor_constant534_r0", "_tensor_constant535_r0", "d0_arg47_1", "d0_arg6_1", "d0_arg48_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant537_r0", "_tensor_constant539_r0", "_tensor_constant540_r0", "_param_constant150_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant151_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_tensor_constant547_r0", "_param_constant152_r0", "_tensor_constant548_r0", "_tensor_constant550_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "21": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant551_r0", "_param_constant155_r0", "_tensor_constant552_r0", "_tensor_constant554_r0", "_param_constant156_r0", "_tensor_constant555_r0", "_tensor_constant557_r0", "_param_constant157_r0", "_tensor_constant558_r0", "_tensor_constant559_r0", "_tensor_constant560_r0", "d0_arg68_1", "_tensor_constant561_r0", "_tensor_constant562_r0", "_tensor_constant563_r0", "_tensor_constant564_r0", "d0_arg49_1", "d0_arg6_1", "d0_arg50_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant566_r0", "_tensor_constant568_r0", "_tensor_constant569_r0", "_param_constant158_r0", "_tensor_constant570_r0", "_tensor_constant572_r0", "_param_constant159_r0", "_tensor_constant573_r0", "_tensor_constant575_r0", "_tensor_constant576_r0", "_param_constant160_r0", "_tensor_constant577_r0", "_tensor_constant579_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "22": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant580_r0", "_param_constant163_r0", "_tensor_constant581_r0", "_tensor_constant583_r0", "_param_constant164_r0", "_tensor_constant584_r0", "_tensor_constant586_r0", "_param_constant165_r0", "_tensor_constant587_r0", "_tensor_constant588_r0", "_tensor_constant589_r0", "d0_arg68_1", "_tensor_constant590_r0", "_tensor_constant591_r0", "_tensor_constant592_r0", "_tensor_constant593_r0", "d0_arg51_1", "d0_arg6_1", "d0_arg52_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant595_r0", "_tensor_constant597_r0", "_tensor_constant598_r0", "_param_constant166_r0", "_tensor_constant599_r0", "_tensor_constant601_r0", "_param_constant167_r0", "_tensor_constant602_r0", "_tensor_constant604_r0", "_tensor_constant605_r0", "_param_constant168_r0", "_tensor_constant606_r0", "_tensor_constant608_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "23": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant609_r0", "_param_constant171_r0", "_tensor_constant610_r0", "_tensor_constant612_r0", "_param_constant172_r0", "_tensor_constant613_r0", "_tensor_constant615_r0", "_param_constant173_r0", "_tensor_constant616_r0", "_tensor_constant617_r0", "_tensor_constant618_r0", "d0_arg68_1", "_tensor_constant619_r0", "_tensor_constant620_r0", "_tensor_constant621_r0", "_tensor_constant622_r0", "d0_arg53_1", "d0_arg6_1", "d0_arg54_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant624_r0", "_tensor_constant626_r0", "_tensor_constant627_r0", "_param_constant174_r0", "_tensor_constant628_r0", "_tensor_constant630_r0", "_param_constant175_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_tensor_constant634_r0", "_param_constant176_r0", "_tensor_constant635_r0", "_tensor_constant637_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "24": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant638_r0", "_param_constant179_r0", "_tensor_constant639_r0", "_tensor_constant641_r0", "_param_constant180_r0", "_tensor_constant642_r0", "_tensor_constant644_r0", "_param_constant181_r0", "_tensor_constant645_r0", "_tensor_constant646_r0", "_tensor_constant647_r0", "d0_arg68_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_tensor_constant650_r0", "_tensor_constant651_r0", "d0_arg55_1", "d0_arg6_1", "d0_arg56_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant182_r0", "_tensor_constant657_r0", "_tensor_constant659_r0", "_param_constant183_r0", "_tensor_constant660_r0", "_tensor_constant662_r0", "_tensor_constant663_r0", "_param_constant184_r0", "_tensor_constant664_r0", "_tensor_constant666_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "25": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant667_r0", "_param_constant187_r0", "_tensor_constant668_r0", "_tensor_constant670_r0", "_param_constant188_r0", "_tensor_constant671_r0", "_tensor_constant673_r0", "_param_constant189_r0", "_tensor_constant674_r0", "_tensor_constant675_r0", "_tensor_constant676_r0", "d0_arg68_1", "_tensor_constant677_r0", "_tensor_constant678_r0", "_tensor_constant679_r0", "_tensor_constant680_r0", "d0_arg57_1", "d0_arg6_1", "d0_arg58_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant682_r0", "_tensor_constant684_r0", "_tensor_constant685_r0", "_param_constant190_r0", "_tensor_constant686_r0", "_tensor_constant688_r0", "_param_constant191_r0", "_tensor_constant689_r0", "_tensor_constant691_r0", "_tensor_constant692_r0", "_param_constant192_r0", "_tensor_constant693_r0", "_tensor_constant695_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "26": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant696_r0", "_param_constant195_r0", "_tensor_constant697_r0", "_tensor_constant699_r0", "_param_constant196_r0", "_tensor_constant700_r0", "_tensor_constant702_r0", "_param_constant197_r0", "_tensor_constant703_r0", "_tensor_constant704_r0", "_tensor_constant705_r0", "d0_arg68_1", "_tensor_constant706_r0", "_tensor_constant707_r0", "_tensor_constant708_r0", "_tensor_constant709_r0", "d0_arg59_1", "d0_arg6_1", "d0_arg60_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant711_r0", "_tensor_constant713_r0", "_tensor_constant714_r0", "_param_constant198_r0", "_tensor_constant715_r0", "_tensor_constant717_r0", "_param_constant199_r0", "_tensor_constant718_r0", "_tensor_constant720_r0", "_tensor_constant721_r0", "_param_constant200_r0", "_tensor_constant722_r0", "_tensor_constant724_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "27": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant725_r0", "_param_constant203_r0", "_tensor_constant726_r0", "_tensor_constant728_r0", "_param_constant204_r0", "_tensor_constant729_r0", "_tensor_constant731_r0", "_param_constant205_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "_tensor_constant734_r0", "d0_arg68_1", "_tensor_constant735_r0", "_tensor_constant736_r0", "_tensor_constant737_r0", "_tensor_constant738_r0", "d0_arg61_1", "d0_arg6_1", "d0_arg62_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant740_r0", "_tensor_constant742_r0", "_tensor_constant743_r0", "_param_constant206_r0", "_tensor_constant744_r0", "_tensor_constant746_r0", "_param_constant207_r0", "_tensor_constant747_r0", "_tensor_constant749_r0", "_tensor_constant750_r0", "_param_constant208_r0", "_tensor_constant751_r0", "_tensor_constant753_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "28": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant754_r0", "_param_constant211_r0", "_tensor_constant755_r0", "_tensor_constant757_r0", "_param_constant212_r0", "_tensor_constant758_r0", "_tensor_constant760_r0", "_param_constant213_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg68_1", "_tensor_constant764_r0", "_tensor_constant765_r0", "_tensor_constant766_r0", "_tensor_constant767_r0", "d0_arg63_1", "d0_arg6_1", "d0_arg64_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant769_r0", "_tensor_constant771_r0", "_tensor_constant772_r0", "_param_constant214_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_param_constant215_r0", "_tensor_constant776_r0", "_tensor_constant778_r0", "_tensor_constant779_r0", "_param_constant216_r0", "_tensor_constant780_r0", "_tensor_constant782_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "ff1369bcc7b47669dfd8a1baad2f11d5" }, "29": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant783_r0", "_param_constant219_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant220_r0", "_tensor_constant787_r0", "_tensor_constant789_r0", "_param_constant221_r0", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "d0_arg68_1", "d0_arg65_1", "d0_arg6_1", "d0_arg66_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant801_r0", "_param_constant222_r0", "_tensor_constant802_r0", "_tensor_constant804_r0", "_param_constant223_r0", "_tensor_constant805_r0", "_tensor_constant807_r0", "_tensor_constant808_r0", "_param_constant224_r0", "_tensor_constant809_r0", "_tensor_constant811_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant812_r0", "_param_constant227_r0", "_tensor_constant813_r0", "_tensor_constant815_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "90098b75e19f46042ea7d3d8b4674c0b" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 64, 1 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 64, 1 ], "dtype": "i32", "idx": 1 }, "past_valid_key_prompt_indices": { "shape": [ 30720 ], "dtype": "i32", "idx": 2 }, "past_valid_key_decode_indices": { "shape": [ 8128 ], "dtype": "i32", "idx": 3 }, "past_valid_value_prompt_indices": { "shape": [ 30720 ], "dtype": "i32", "idx": 4 }, "past_valid_value_decode_indices": { "shape": [ 8128 ], "dtype": "i32", "idx": 5 }, "input_ids": { "shape": [ 64, 1 ], "dtype": "i32", "idx": 6 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 60 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 61 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 62 }, "attention_mask": { "shape": [ 64, 2048 ], "dtype": "bool", "idx": 63 }, "position_ids": { "shape": [ 64, 1 ], "dtype": "i32", "idx": 64 } }, "outputs": { "logits": { "shape": [ 64, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 64 ], [ 0, 1 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 64 ], [ 0, 1 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 30720 ] ], "origin": "past_valid_key_prompt_indices", "dtype": "i32", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 8128 ] ], "origin": "past_valid_key_decode_indices", "dtype": "i32", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 30720 ] ], "origin": "past_valid_value_prompt_indices", "dtype": "i32", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 8128 ] ], "origin": "past_valid_value_decode_indices", "dtype": "i32", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 64 ], [ 0, 1 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg61_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg62_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg63_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg64_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg65_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg66_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg67_1": { "placements": [ [ 0, 64 ], [ 0, 2048 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg68_1": { "placements": [ [ 0, 64 ], [ 0, 1 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 64 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "1ed55b4780a08696d73ce7879b62802d": null, "ff1369bcc7b47669dfd8a1baad2f11d5": null, "90098b75e19f46042ea7d3d8b4674c0b": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv2047-b64-attn2048-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv2047-b128-attn2048", "devices": { "0": "npu:0:0-3" }, "tensors": { "_param_constant0_r0": { "shape": [ 50401, 4096 ], "dtype": "bf16", "value": { "param_file": "0", "name": "decode_model.transformer.wte.org_target.weight", "name_in_graph": "_param_constant0_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_param_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.weight", "name_in_graph": "_param_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant2_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.ln_1.org_target.bias", "name_in_graph": "_param_constant2_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant0_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant0_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant3_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant3_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant1_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant1_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant3_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant3_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant4_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant4_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant4_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant4_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant6_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant6_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant5_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant5_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant7_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant7_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant8_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant8_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant9_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.embed_positions", "name_in_graph": "_tensor_constant9_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant18_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant18_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant6_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant6_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant19_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant19_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant21_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant21_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant7_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant7_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant22_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant22_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant24_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant24_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant25_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_5.QLV4_output_scale", "name_in_graph": "_tensor_constant25_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant8_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.0.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant8_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.0.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant28_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.0.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant28_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant9_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.weight", "name_in_graph": "_param_constant9_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant10_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.ln_1.org_target.bias", "name_in_graph": "_param_constant10_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant29_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant29_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant11_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant11_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant30_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant30_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant32_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant32_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant12_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant12_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant35_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant35_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant13_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant13_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant36_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant36_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant37_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant37_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant38_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.embed_positions", "name_in_graph": "_tensor_constant38_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant39_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_4_input_0_1_scale", "name_in_graph": "_tensor_constant39_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant40_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_4_input_1_1_scale", "name_in_graph": "_tensor_constant40_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant41_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_5_input_0_1_scale", "name_in_graph": "_tensor_constant41_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant42_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_5_input_1_1_scale", "name_in_graph": "_tensor_constant42_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant44_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_2.QLV4_output_scale", "name_in_graph": "_tensor_constant44_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant46_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_3_input_1_1_scale", "name_in_graph": "_tensor_constant46_r0", "placements": [] } }, "_tensor_constant47_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant47_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant14_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant14_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant48_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant48_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant15_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant15_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant51_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant51_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant53_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant53_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant54_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant54_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant16_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.1.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant16_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant55_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.1.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant55_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.1.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant17_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.weight", "name_in_graph": "_param_constant17_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant18_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.ln_1.org_target.bias", "name_in_graph": "_param_constant18_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant58_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant58_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant19_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant19_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant59_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant59_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant61_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant61_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant20_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant20_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant62_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant62_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant64_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant64_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant21_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant21_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant67_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.embed_positions", "name_in_graph": "_tensor_constant67_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant68_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_8_input_0_1_scale", "name_in_graph": "_tensor_constant68_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant69_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_8_input_1_1_scale", "name_in_graph": "_tensor_constant69_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant70_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_9_input_0_1_scale", "name_in_graph": "_tensor_constant70_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant71_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_9_input_1_1_scale", "name_in_graph": "_tensor_constant71_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant73_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_4.QLV4_output_scale", "name_in_graph": "_tensor_constant73_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant75_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_5_input_1_1_scale", "name_in_graph": "_tensor_constant75_r0", "placements": [] } }, "_tensor_constant76_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant76_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant22_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant22_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant77_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant77_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant79_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant79_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant23_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant23_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant80_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant80_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant82_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant82_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant83_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_15.QLV4_output_scale", "name_in_graph": "_tensor_constant83_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant24_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.2.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant24_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant84_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.2.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant84_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant86_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.2.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant86_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant25_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.weight", "name_in_graph": "_param_constant25_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant26_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.ln_1.org_target.bias", "name_in_graph": "_param_constant26_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant87_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant87_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant27_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant27_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant88_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant88_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant28_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant28_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant91_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant91_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant93_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant93_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant29_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant29_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant94_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant94_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant95_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant95_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant96_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.embed_positions", "name_in_graph": "_tensor_constant96_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant97_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_12_input_0_1_scale", "name_in_graph": "_tensor_constant97_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant98_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_12_input_1_1_scale", "name_in_graph": "_tensor_constant98_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant99_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_13_input_0_1_scale", "name_in_graph": "_tensor_constant99_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant100_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_13_input_1_1_scale", "name_in_graph": "_tensor_constant100_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant102_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_6.QLV4_output_scale", "name_in_graph": "_tensor_constant102_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant104_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_7_input_1_1_scale", "name_in_graph": "_tensor_constant104_r0", "placements": [] } }, "_tensor_constant105_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant105_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant30_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant30_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant108_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant108_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant31_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant31_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant109_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant109_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant111_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant111_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant112_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant112_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant32_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.3.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant32_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.3.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant115_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.3.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant115_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant33_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.weight", "name_in_graph": "_param_constant33_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant34_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.ln_1.org_target.bias", "name_in_graph": "_param_constant34_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant116_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant116_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant35_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant35_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant117_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant117_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant119_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant119_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant36_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant36_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant120_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant120_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant37_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant37_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant123_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant123_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant124_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant124_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant125_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.embed_positions", "name_in_graph": "_tensor_constant125_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant126_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_16_input_0_1_scale", "name_in_graph": "_tensor_constant126_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant127_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_16_input_1_1_scale", "name_in_graph": "_tensor_constant127_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant128_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_17_input_0_1_scale", "name_in_graph": "_tensor_constant128_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant129_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_17_input_1_1_scale", "name_in_graph": "_tensor_constant129_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant131_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_8.QLV4_output_scale", "name_in_graph": "_tensor_constant131_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant133_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_9_input_1_1_scale", "name_in_graph": "_tensor_constant133_r0", "placements": [] } }, "_tensor_constant134_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant134_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant38_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant38_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant135_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant135_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant39_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant39_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant138_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant138_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant140_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant140_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant141_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_25.QLV4_output_scale", "name_in_graph": "_tensor_constant141_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant40_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.4.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant40_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant142_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.4.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant142_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant144_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.4.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant144_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant41_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.weight", "name_in_graph": "_param_constant41_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant42_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.ln_1.org_target.bias", "name_in_graph": "_param_constant42_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant145_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant145_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant43_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant43_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant148_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant148_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant44_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant44_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant149_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant149_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant151_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant151_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant45_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant45_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant152_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant152_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant154_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.embed_positions", "name_in_graph": "_tensor_constant154_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant155_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_20_input_0_1_scale", "name_in_graph": "_tensor_constant155_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant156_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_20_input_1_1_scale", "name_in_graph": "_tensor_constant156_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant157_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_21_input_0_1_scale", "name_in_graph": "_tensor_constant157_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant158_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_21_input_1_1_scale", "name_in_graph": "_tensor_constant158_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant160_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_10.QLV4_output_scale", "name_in_graph": "_tensor_constant160_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant162_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_11_input_1_1_scale", "name_in_graph": "_tensor_constant162_r0", "placements": [] } }, "_tensor_constant163_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant163_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant46_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant46_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant164_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant164_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant166_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant166_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant47_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant47_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant167_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant167_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant169_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant169_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant170_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant170_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant48_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.5.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant48_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant171_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.5.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant171_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant173_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.5.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant173_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant49_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.weight", "name_in_graph": "_param_constant49_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant50_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.ln_1.org_target.bias", "name_in_graph": "_param_constant50_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant174_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant174_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant51_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant51_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant175_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant175_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant52_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant52_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant180_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant180_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant53_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant53_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant181_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant181_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant182_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant182_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant183_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.embed_positions", "name_in_graph": "_tensor_constant183_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant184_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_24_input_0_1_scale", "name_in_graph": "_tensor_constant184_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant185_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_24_input_1_1_scale", "name_in_graph": "_tensor_constant185_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant186_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_25_input_0_1_scale", "name_in_graph": "_tensor_constant186_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant187_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_25_input_1_1_scale", "name_in_graph": "_tensor_constant187_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant189_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_12.QLV4_output_scale", "name_in_graph": "_tensor_constant189_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant191_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_13_input_1_1_scale", "name_in_graph": "_tensor_constant191_r0", "placements": [] } }, "_tensor_constant192_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant192_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant54_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant54_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant195_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant195_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant55_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant55_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant196_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant196_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant198_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant198_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant199_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_35.QLV4_output_scale", "name_in_graph": "_tensor_constant199_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant56_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.6.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant56_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant200_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.6.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant200_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.6.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant57_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.weight", "name_in_graph": "_param_constant57_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant58_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.ln_1.org_target.bias", "name_in_graph": "_param_constant58_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant203_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant203_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant59_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant59_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant204_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant204_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant206_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant206_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant60_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant60_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant207_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant207_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant61_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant61_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant211_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant211_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant212_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.embed_positions", "name_in_graph": "_tensor_constant212_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant213_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_28_input_0_1_scale", "name_in_graph": "_tensor_constant213_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant214_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_28_input_1_1_scale", "name_in_graph": "_tensor_constant214_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant215_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_29_input_0_1_scale", "name_in_graph": "_tensor_constant215_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant216_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_29_input_1_1_scale", "name_in_graph": "_tensor_constant216_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant218_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_14.QLV4_output_scale", "name_in_graph": "_tensor_constant218_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant220_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_15_input_1_1_scale", "name_in_graph": "_tensor_constant220_r0", "placements": [] } }, "_tensor_constant221_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant221_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant62_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant62_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant222_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant222_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant224_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant224_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant63_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant63_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant225_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant225_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant227_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant227_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant228_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant228_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant64_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.7.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant64_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant229_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.7.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant229_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant231_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.7.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant231_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant65_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.weight", "name_in_graph": "_param_constant65_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant66_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.ln_1.org_target.bias", "name_in_graph": "_param_constant66_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant232_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant232_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant67_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant67_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant233_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant233_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant235_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant235_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant68_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant68_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant236_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant236_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant238_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant238_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant69_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant69_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant239_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant239_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant240_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant240_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant241_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.embed_positions", "name_in_graph": "_tensor_constant241_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant242_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_32_input_0_1_scale", "name_in_graph": "_tensor_constant242_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant243_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_32_input_1_1_scale", "name_in_graph": "_tensor_constant243_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant244_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_33_input_0_1_scale", "name_in_graph": "_tensor_constant244_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant245_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_33_input_1_1_scale", "name_in_graph": "_tensor_constant245_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant247_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_16.QLV4_output_scale", "name_in_graph": "_tensor_constant247_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant249_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_17_input_1_1_scale", "name_in_graph": "_tensor_constant249_r0", "placements": [] } }, "_tensor_constant250_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant250_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant70_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant70_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant251_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant251_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant253_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant253_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant71_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant71_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant254_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant254_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant256_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant256_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant257_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_45.QLV4_output_scale", "name_in_graph": "_tensor_constant257_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant72_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.8.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant72_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant258_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.8.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant258_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant260_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.8.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant260_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant73_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.weight", "name_in_graph": "_param_constant73_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant74_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.ln_1.org_target.bias", "name_in_graph": "_param_constant74_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant261_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant261_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant75_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant75_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant262_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant262_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant264_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant264_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant76_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant76_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant265_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant265_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant267_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant267_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant77_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant77_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant268_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant268_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant269_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant269_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant270_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.embed_positions", "name_in_graph": "_tensor_constant270_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant271_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_36_input_0_1_scale", "name_in_graph": "_tensor_constant271_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant272_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_36_input_1_1_scale", "name_in_graph": "_tensor_constant272_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant273_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_37_input_0_1_scale", "name_in_graph": "_tensor_constant273_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant274_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_37_input_1_1_scale", "name_in_graph": "_tensor_constant274_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant276_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_18.QLV4_output_scale", "name_in_graph": "_tensor_constant276_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant278_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_19_input_1_1_scale", "name_in_graph": "_tensor_constant278_r0", "placements": [] } }, "_tensor_constant279_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant279_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant78_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant78_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant280_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant280_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant282_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant282_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant79_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant79_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant283_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant283_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant285_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant285_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant286_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant286_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant80_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.9.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant80_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant287_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.9.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant287_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant289_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.9.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant289_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant81_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.weight", "name_in_graph": "_param_constant81_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant82_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.ln_1.org_target.bias", "name_in_graph": "_param_constant82_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant290_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant290_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant83_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant83_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant291_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant291_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant293_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant293_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant84_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant84_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant294_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant294_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant296_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant296_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant85_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant85_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant297_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant297_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant298_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant298_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant299_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.embed_positions", "name_in_graph": "_tensor_constant299_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant300_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_40_input_0_1_scale", "name_in_graph": "_tensor_constant300_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant301_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_40_input_1_1_scale", "name_in_graph": "_tensor_constant301_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant302_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_41_input_0_1_scale", "name_in_graph": "_tensor_constant302_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant303_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_41_input_1_1_scale", "name_in_graph": "_tensor_constant303_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant305_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_20.QLV4_output_scale", "name_in_graph": "_tensor_constant305_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant307_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_21_input_1_1_scale", "name_in_graph": "_tensor_constant307_r0", "placements": [] } }, "_tensor_constant308_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant308_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant86_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant86_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant309_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant309_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant311_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant311_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant87_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant87_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant312_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant312_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant314_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant314_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant315_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_55.QLV4_output_scale", "name_in_graph": "_tensor_constant315_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant88_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.10.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant88_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant316_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.10.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant316_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant318_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.10.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant318_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant89_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.weight", "name_in_graph": "_param_constant89_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant90_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.ln_1.org_target.bias", "name_in_graph": "_param_constant90_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant319_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant319_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant91_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant91_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant320_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant320_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant322_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant322_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant92_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant92_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant323_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant323_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant325_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant325_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant93_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant93_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant326_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant326_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant327_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant327_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant328_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.embed_positions", "name_in_graph": "_tensor_constant328_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant329_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_44_input_0_1_scale", "name_in_graph": "_tensor_constant329_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant330_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_44_input_1_1_scale", "name_in_graph": "_tensor_constant330_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant331_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_45_input_0_1_scale", "name_in_graph": "_tensor_constant331_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant332_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_45_input_1_1_scale", "name_in_graph": "_tensor_constant332_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant334_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_22.QLV4_output_scale", "name_in_graph": "_tensor_constant334_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant336_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_23_input_1_1_scale", "name_in_graph": "_tensor_constant336_r0", "placements": [] } }, "_tensor_constant337_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant337_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant94_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant94_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant338_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant338_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant340_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant340_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant95_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant95_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant341_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant341_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant343_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant343_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant344_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_60.QLV4_output_scale", "name_in_graph": "_tensor_constant344_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant96_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.11.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant96_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant345_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.11.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant345_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant347_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.11.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant347_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant97_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.weight", "name_in_graph": "_param_constant97_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant98_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.ln_1.org_target.bias", "name_in_graph": "_param_constant98_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant348_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant348_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant99_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant99_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant349_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant349_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant351_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant351_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant100_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant100_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant352_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant352_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant354_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant354_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant101_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant101_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant355_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant355_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant356_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant356_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant357_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.embed_positions", "name_in_graph": "_tensor_constant357_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant358_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_48_input_0_1_scale", "name_in_graph": "_tensor_constant358_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant359_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_48_input_1_1_scale", "name_in_graph": "_tensor_constant359_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant360_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_49_input_0_1_scale", "name_in_graph": "_tensor_constant360_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant361_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_49_input_1_1_scale", "name_in_graph": "_tensor_constant361_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant363_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_24.QLV4_output_scale", "name_in_graph": "_tensor_constant363_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant365_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_25_input_1_1_scale", "name_in_graph": "_tensor_constant365_r0", "placements": [] } }, "_tensor_constant366_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant366_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant102_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant102_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant367_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant367_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant369_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant369_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant103_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant103_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant370_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant370_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant372_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant372_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant373_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_65.QLV4_output_scale", "name_in_graph": "_tensor_constant373_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant104_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.12.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant104_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant374_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.12.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant374_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant376_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.12.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant376_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant105_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.weight", "name_in_graph": "_param_constant105_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant106_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.ln_1.org_target.bias", "name_in_graph": "_param_constant106_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant377_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant377_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant107_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant107_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant378_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant378_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant380_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant380_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant108_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant108_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant381_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant381_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant383_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant383_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant109_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant109_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant384_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant384_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant385_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant385_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant386_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.embed_positions", "name_in_graph": "_tensor_constant386_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant387_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_52_input_0_1_scale", "name_in_graph": "_tensor_constant387_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant388_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_52_input_1_1_scale", "name_in_graph": "_tensor_constant388_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant389_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_53_input_0_1_scale", "name_in_graph": "_tensor_constant389_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant390_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_53_input_1_1_scale", "name_in_graph": "_tensor_constant390_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant392_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_26.QLV4_output_scale", "name_in_graph": "_tensor_constant392_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant394_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_27_input_1_1_scale", "name_in_graph": "_tensor_constant394_r0", "placements": [] } }, "_tensor_constant395_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant395_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant110_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant110_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant396_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant396_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant398_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant398_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant111_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant111_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant399_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant399_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant401_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant401_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant402_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_70.QLV4_output_scale", "name_in_graph": "_tensor_constant402_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant112_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.13.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant112_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant403_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.13.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant403_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant405_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.13.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant405_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant113_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.weight", "name_in_graph": "_param_constant113_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant114_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.ln_1.org_target.bias", "name_in_graph": "_param_constant114_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant406_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant406_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant115_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant115_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant407_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant407_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant409_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant409_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant116_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant116_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant410_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant410_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant412_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant412_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant117_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant117_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant413_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant413_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant414_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant414_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant415_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.embed_positions", "name_in_graph": "_tensor_constant415_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant416_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_56_input_0_1_scale", "name_in_graph": "_tensor_constant416_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant417_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_56_input_1_1_scale", "name_in_graph": "_tensor_constant417_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant418_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_57_input_0_1_scale", "name_in_graph": "_tensor_constant418_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant419_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_57_input_1_1_scale", "name_in_graph": "_tensor_constant419_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant421_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_28.QLV4_output_scale", "name_in_graph": "_tensor_constant421_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant423_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_29_input_1_1_scale", "name_in_graph": "_tensor_constant423_r0", "placements": [] } }, "_tensor_constant424_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant424_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant118_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant118_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant425_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant425_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant427_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant427_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant119_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant119_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant428_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant428_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant430_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant430_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant431_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_75.QLV4_output_scale", "name_in_graph": "_tensor_constant431_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant120_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.14.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant120_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant432_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.14.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant432_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant434_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.14.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant434_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant121_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.weight", "name_in_graph": "_param_constant121_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant122_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.ln_1.org_target.bias", "name_in_graph": "_param_constant122_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant435_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant435_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant123_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant123_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant436_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant436_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant438_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant438_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant124_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant124_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant439_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant439_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant441_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant441_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant125_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant125_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant442_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant442_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant443_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant443_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant444_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.embed_positions", "name_in_graph": "_tensor_constant444_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant445_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_60_input_0_1_scale", "name_in_graph": "_tensor_constant445_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant446_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_60_input_1_1_scale", "name_in_graph": "_tensor_constant446_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant447_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_61_input_0_1_scale", "name_in_graph": "_tensor_constant447_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant448_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_61_input_1_1_scale", "name_in_graph": "_tensor_constant448_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant450_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_30.QLV4_output_scale", "name_in_graph": "_tensor_constant450_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant452_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_31_input_1_1_scale", "name_in_graph": "_tensor_constant452_r0", "placements": [] } }, "_tensor_constant453_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant453_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant126_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant126_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant454_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant454_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant456_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant456_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant127_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant127_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant457_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant457_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant459_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant459_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant460_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_80.QLV4_output_scale", "name_in_graph": "_tensor_constant460_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant128_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.15.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant128_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant461_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.15.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant461_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant463_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.15.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant463_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant129_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.weight", "name_in_graph": "_param_constant129_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant130_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.ln_1.org_target.bias", "name_in_graph": "_param_constant130_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant464_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant464_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant131_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant131_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant465_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant465_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant467_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant467_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant132_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant132_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant468_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant468_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant470_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant470_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant133_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant133_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant471_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant471_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant472_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant472_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant473_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.embed_positions", "name_in_graph": "_tensor_constant473_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant474_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_64_input_0_1_scale", "name_in_graph": "_tensor_constant474_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant475_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_64_input_1_1_scale", "name_in_graph": "_tensor_constant475_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant476_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_65_input_0_1_scale", "name_in_graph": "_tensor_constant476_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant477_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_65_input_1_1_scale", "name_in_graph": "_tensor_constant477_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant479_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_32.QLV4_output_scale", "name_in_graph": "_tensor_constant479_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant481_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_33_input_1_1_scale", "name_in_graph": "_tensor_constant481_r0", "placements": [] } }, "_tensor_constant482_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant482_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant134_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant134_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant483_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant483_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant485_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant485_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant135_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant135_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant486_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant486_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant488_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant488_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant489_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_85.QLV4_output_scale", "name_in_graph": "_tensor_constant489_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant136_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.16.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant136_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant490_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.16.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant490_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant492_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.16.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant492_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant137_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.weight", "name_in_graph": "_param_constant137_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant138_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.ln_1.org_target.bias", "name_in_graph": "_param_constant138_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant493_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant493_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant139_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant139_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant494_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant494_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant496_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant496_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant140_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant140_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant497_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant497_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant499_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant499_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant141_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant141_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant500_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant500_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant501_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant501_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant502_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.embed_positions", "name_in_graph": "_tensor_constant502_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant503_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_68_input_0_1_scale", "name_in_graph": "_tensor_constant503_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant504_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_68_input_1_1_scale", "name_in_graph": "_tensor_constant504_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant505_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_69_input_0_1_scale", "name_in_graph": "_tensor_constant505_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant506_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_69_input_1_1_scale", "name_in_graph": "_tensor_constant506_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant508_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_34.QLV4_output_scale", "name_in_graph": "_tensor_constant508_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant510_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_35_input_1_1_scale", "name_in_graph": "_tensor_constant510_r0", "placements": [] } }, "_tensor_constant511_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant511_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant142_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant142_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant512_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant512_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant514_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant514_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant143_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant143_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant515_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant515_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant517_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant517_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant518_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_90.QLV4_output_scale", "name_in_graph": "_tensor_constant518_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant144_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.17.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant144_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant519_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.17.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant519_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant521_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.17.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant521_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant145_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.weight", "name_in_graph": "_param_constant145_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant146_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.ln_1.org_target.bias", "name_in_graph": "_param_constant146_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant522_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant522_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant147_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant147_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant523_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant523_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant525_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant525_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant148_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant148_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant526_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant526_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant528_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant528_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant149_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant149_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant529_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant529_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant530_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant530_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant531_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.embed_positions", "name_in_graph": "_tensor_constant531_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant532_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_72_input_0_1_scale", "name_in_graph": "_tensor_constant532_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant533_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_72_input_1_1_scale", "name_in_graph": "_tensor_constant533_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant534_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_73_input_0_1_scale", "name_in_graph": "_tensor_constant534_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant535_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_73_input_1_1_scale", "name_in_graph": "_tensor_constant535_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant537_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_36.QLV4_output_scale", "name_in_graph": "_tensor_constant537_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant539_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_37_input_1_1_scale", "name_in_graph": "_tensor_constant539_r0", "placements": [] } }, "_tensor_constant540_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant540_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant150_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant150_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant541_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant541_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant543_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant543_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant151_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant151_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant544_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant544_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant546_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant546_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant547_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_95.QLV4_output_scale", "name_in_graph": "_tensor_constant547_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant152_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.18.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant152_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant548_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.18.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant548_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant550_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.18.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant550_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant153_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.weight", "name_in_graph": "_param_constant153_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant154_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.ln_1.org_target.bias", "name_in_graph": "_param_constant154_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant551_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant551_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant155_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant155_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant552_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant552_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant554_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant554_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant156_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant156_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant555_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant555_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant557_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant557_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant157_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant157_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant558_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant558_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant559_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant559_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant560_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.embed_positions", "name_in_graph": "_tensor_constant560_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant561_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_76_input_0_1_scale", "name_in_graph": "_tensor_constant561_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant562_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_76_input_1_1_scale", "name_in_graph": "_tensor_constant562_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant563_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_77_input_0_1_scale", "name_in_graph": "_tensor_constant563_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant564_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_77_input_1_1_scale", "name_in_graph": "_tensor_constant564_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant566_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_38.QLV4_output_scale", "name_in_graph": "_tensor_constant566_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant568_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_39_input_1_1_scale", "name_in_graph": "_tensor_constant568_r0", "placements": [] } }, "_tensor_constant569_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant569_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant158_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant158_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant570_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant570_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant572_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant572_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant159_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant159_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant573_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant573_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant575_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant575_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant576_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_100.QLV4_output_scale", "name_in_graph": "_tensor_constant576_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant160_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.19.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant160_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant577_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.19.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant577_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant579_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.19.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant579_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant161_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.weight", "name_in_graph": "_param_constant161_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant162_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.ln_1.org_target.bias", "name_in_graph": "_param_constant162_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant580_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant580_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant163_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant163_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant581_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant581_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant583_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant583_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant164_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant164_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant584_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant584_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant586_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant586_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant165_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant165_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant587_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant587_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant588_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant588_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant589_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.embed_positions", "name_in_graph": "_tensor_constant589_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant590_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_80_input_0_1_scale", "name_in_graph": "_tensor_constant590_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant591_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_80_input_1_1_scale", "name_in_graph": "_tensor_constant591_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant592_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_81_input_0_1_scale", "name_in_graph": "_tensor_constant592_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant593_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_81_input_1_1_scale", "name_in_graph": "_tensor_constant593_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant595_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_40.QLV4_output_scale", "name_in_graph": "_tensor_constant595_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant597_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_41_input_1_1_scale", "name_in_graph": "_tensor_constant597_r0", "placements": [] } }, "_tensor_constant598_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant598_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant166_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant166_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant599_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant599_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant601_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant601_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant167_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant167_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant602_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant602_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant604_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant604_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant605_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_105.QLV4_output_scale", "name_in_graph": "_tensor_constant605_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant168_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.20.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant168_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant606_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.20.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant606_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant608_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.20.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant608_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant169_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.weight", "name_in_graph": "_param_constant169_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant170_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.ln_1.org_target.bias", "name_in_graph": "_param_constant170_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant609_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant609_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant171_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant171_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant610_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant610_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant612_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant612_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant172_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant172_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant613_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant613_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant615_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant615_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant173_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant173_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant616_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant616_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant617_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant617_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant618_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.embed_positions", "name_in_graph": "_tensor_constant618_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant619_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_84_input_0_1_scale", "name_in_graph": "_tensor_constant619_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant620_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_84_input_1_1_scale", "name_in_graph": "_tensor_constant620_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant621_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_85_input_0_1_scale", "name_in_graph": "_tensor_constant621_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant622_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_85_input_1_1_scale", "name_in_graph": "_tensor_constant622_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant624_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_42.QLV4_output_scale", "name_in_graph": "_tensor_constant624_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant626_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_43_input_1_1_scale", "name_in_graph": "_tensor_constant626_r0", "placements": [] } }, "_tensor_constant627_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant627_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant174_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant174_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant628_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant628_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant630_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant630_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant175_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant175_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant631_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant631_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant633_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant633_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant634_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_110.QLV4_output_scale", "name_in_graph": "_tensor_constant634_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant176_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.21.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant176_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant635_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.21.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant635_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant637_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.21.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant637_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant177_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.weight", "name_in_graph": "_param_constant177_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant178_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.ln_1.org_target.bias", "name_in_graph": "_param_constant178_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant638_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant638_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant179_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant179_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant639_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant639_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant641_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant641_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant180_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant180_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant642_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant642_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant644_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant644_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant181_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant181_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant645_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant645_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant646_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant646_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant647_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.embed_positions", "name_in_graph": "_tensor_constant647_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant648_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_88_input_0_1_scale", "name_in_graph": "_tensor_constant648_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant649_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_88_input_1_1_scale", "name_in_graph": "_tensor_constant649_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant650_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_89_input_0_1_scale", "name_in_graph": "_tensor_constant650_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant651_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_89_input_1_1_scale", "name_in_graph": "_tensor_constant651_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant653_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_44.QLV4_output_scale", "name_in_graph": "_tensor_constant653_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant655_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_45_input_1_1_scale", "name_in_graph": "_tensor_constant655_r0", "placements": [] } }, "_tensor_constant656_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant656_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant182_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant182_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant657_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant657_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant659_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant659_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant183_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant183_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant660_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant660_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant662_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant662_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant663_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_115.QLV4_output_scale", "name_in_graph": "_tensor_constant663_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant184_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.22.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant184_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant664_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.22.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant664_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant666_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.22.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant666_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant185_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.weight", "name_in_graph": "_param_constant185_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant186_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.ln_1.org_target.bias", "name_in_graph": "_param_constant186_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant667_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant667_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant187_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant187_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant668_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant668_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant670_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant670_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant188_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant188_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant671_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant671_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant673_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant673_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant189_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant189_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant674_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant674_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant675_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant675_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant676_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.embed_positions", "name_in_graph": "_tensor_constant676_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant677_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_92_input_0_1_scale", "name_in_graph": "_tensor_constant677_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant678_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_92_input_1_1_scale", "name_in_graph": "_tensor_constant678_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant679_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_93_input_0_1_scale", "name_in_graph": "_tensor_constant679_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant680_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_93_input_1_1_scale", "name_in_graph": "_tensor_constant680_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant682_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_46.QLV4_output_scale", "name_in_graph": "_tensor_constant682_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant684_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_47_input_1_1_scale", "name_in_graph": "_tensor_constant684_r0", "placements": [] } }, "_tensor_constant685_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant685_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant190_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant190_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant686_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant686_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant688_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant688_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant191_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant191_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant689_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant689_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant691_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant691_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant692_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_120.QLV4_output_scale", "name_in_graph": "_tensor_constant692_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant192_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.23.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant192_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant693_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.23.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant693_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant695_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.23.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant695_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant193_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.weight", "name_in_graph": "_param_constant193_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant194_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.ln_1.org_target.bias", "name_in_graph": "_param_constant194_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant696_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant696_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant195_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant195_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant697_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant697_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant699_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant699_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant196_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant196_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant700_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant700_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant702_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant702_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant197_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant197_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant703_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant703_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant704_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant704_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant705_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.embed_positions", "name_in_graph": "_tensor_constant705_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant706_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_96_input_0_1_scale", "name_in_graph": "_tensor_constant706_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant707_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_96_input_1_1_scale", "name_in_graph": "_tensor_constant707_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant708_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_97_input_0_1_scale", "name_in_graph": "_tensor_constant708_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant709_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_97_input_1_1_scale", "name_in_graph": "_tensor_constant709_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant711_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_48.QLV4_output_scale", "name_in_graph": "_tensor_constant711_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant713_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_49_input_1_1_scale", "name_in_graph": "_tensor_constant713_r0", "placements": [] } }, "_tensor_constant714_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant714_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant198_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant198_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant715_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant715_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant717_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant717_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant199_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant199_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant718_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant718_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant720_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant720_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant721_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_125.QLV4_output_scale", "name_in_graph": "_tensor_constant721_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant200_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.24.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant200_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant722_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.24.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant722_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant724_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.24.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant724_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant201_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.weight", "name_in_graph": "_param_constant201_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant202_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.ln_1.org_target.bias", "name_in_graph": "_param_constant202_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant725_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant725_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant203_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant203_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant726_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant726_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant728_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant728_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant204_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant204_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant729_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant729_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant731_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant731_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant205_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant205_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant732_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant732_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant733_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant733_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant734_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.embed_positions", "name_in_graph": "_tensor_constant734_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant735_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_100_input_0_1_scale", "name_in_graph": "_tensor_constant735_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant736_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_100_input_1_1_scale", "name_in_graph": "_tensor_constant736_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant737_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_101_input_0_1_scale", "name_in_graph": "_tensor_constant737_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant738_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_101_input_1_1_scale", "name_in_graph": "_tensor_constant738_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant740_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_50.QLV4_output_scale", "name_in_graph": "_tensor_constant740_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant742_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_51_input_1_1_scale", "name_in_graph": "_tensor_constant742_r0", "placements": [] } }, "_tensor_constant743_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant743_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant206_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant206_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant744_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant744_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant746_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant746_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant207_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant207_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant747_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant747_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant749_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant749_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant750_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_130.QLV4_output_scale", "name_in_graph": "_tensor_constant750_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant208_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.25.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant208_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant751_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.25.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant751_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant753_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.25.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant753_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant209_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.weight", "name_in_graph": "_param_constant209_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant210_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.ln_1.org_target.bias", "name_in_graph": "_param_constant210_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant754_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant754_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant211_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant211_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant755_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant755_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant757_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant757_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant212_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant212_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant758_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant758_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant760_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant760_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant213_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant213_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant761_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant761_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant762_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant762_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant763_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.embed_positions", "name_in_graph": "_tensor_constant763_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant764_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_104_input_0_1_scale", "name_in_graph": "_tensor_constant764_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant765_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_104_input_1_1_scale", "name_in_graph": "_tensor_constant765_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant766_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_105_input_0_1_scale", "name_in_graph": "_tensor_constant766_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant767_r0": { "shape": [ 1, 1, 16, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.cat_105_input_1_1_scale", "name_in_graph": "_tensor_constant767_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16 ], [ 0, 1 ] ] } }, "_tensor_constant769_r0": { "shape": [ 1, 16, 1, 1 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_52.QLV4_output_scale", "name_in_graph": "_tensor_constant769_r0", "placements": [ [ 0, 1 ], [ 0, 16 ], [ 0, 1 ], [ 0, 1 ] ] } }, "_tensor_constant771_r0": { "shape": [], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.matmul_53_input_1_1_scale", "name_in_graph": "_tensor_constant771_r0", "placements": [] } }, "_tensor_constant772_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant772_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant214_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant214_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant773_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant773_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant775_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant775_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant215_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant215_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant776_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant776_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant778_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant778_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant779_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_135.QLV4_output_scale", "name_in_graph": "_tensor_constant779_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant216_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.26.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant216_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant780_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.26.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant780_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant782_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.26.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant782_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant217_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.weight", "name_in_graph": "_param_constant217_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant218_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.ln_1.org_target.bias", "name_in_graph": "_param_constant218_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant783_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.ln_1.QLV4_output_scale", "name_in_graph": "_tensor_constant783_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant219_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.q_proj.org_target.weight", "name_in_graph": "_param_constant219_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant784_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.q_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant784_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant786_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.q_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant786_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant220_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.k_proj.org_target.weight", "name_in_graph": "_param_constant220_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant787_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.k_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant787_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant789_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.k_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant789_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant221_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.v_proj.org_target.weight", "name_in_graph": "_param_constant221_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant790_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.v_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant790_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant791_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.v_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant791_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant792_r0": { "shape": [ 2048, 128 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.embed_positions", "name_in_graph": "_tensor_constant792_r0", "placements": [ [ 0, 2048 ], [ 0, 128 ] ] } }, "_tensor_constant801_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj_input_0_0_scale", "name_in_graph": "_tensor_constant801_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant222_r0": { "shape": [ 4096, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.attn.out_proj.org_target.weight", "name_in_graph": "_param_constant222_r0", "placements": [ [ 0, 4096 ], [ 0, 4096 ] ] } }, "_tensor_constant802_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.attn.out_proj.QLV4_bias.bias", "name_in_graph": "_tensor_constant802_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant804_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.attn.out_proj.QLV4_output_scale", "name_in_graph": "_tensor_constant804_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant223_r0": { "shape": [ 16384, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_in.org_target.weight", "name_in_graph": "_param_constant223_r0", "placements": [ [ 0, 16384 ], [ 0, 4096 ] ] } }, "_tensor_constant805_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_in.QLV4_bias.bias", "name_in_graph": "_tensor_constant805_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant807_r0": { "shape": [ 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_in.QLV4_output_scale", "name_in_graph": "_tensor_constant807_r0", "placements": [ [ 0, 16384 ] ] } }, "_tensor_constant808_r0": { "shape": [ 1, 1, 16384 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.mul_140.QLV4_output_scale", "name_in_graph": "_tensor_constant808_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 16384 ] ] } }, "_param_constant224_r0": { "shape": [ 4096, 16384 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.transformer.h.27.mlp.fc_out.org_target.weight", "name_in_graph": "_param_constant224_r0", "placements": [ [ 0, 4096 ], [ 0, 16384 ] ] } }, "_tensor_constant809_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.transformer.h.27.mlp.fc_out.QLV4_bias.bias", "name_in_graph": "_tensor_constant809_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant811_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.h.27.mlp.fc_out.QLV4_output_scale", "name_in_graph": "_tensor_constant811_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant225_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.weight", "name_in_graph": "_param_constant225_r0", "placements": [ [ 0, 4096 ] ] } }, "_param_constant226_r0": { "shape": [ 4096 ], "dtype": "f32", "value": { "param_file": "0", "name": "decode_model.transformer.ln_f.org_target.bias", "name_in_graph": "_param_constant226_r0", "placements": [ [ 0, 4096 ] ] } }, "_tensor_constant812_r0": { "shape": [ 1, 1, 4096 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.transformer.ln_f.QLV4_output_scale", "name_in_graph": "_tensor_constant812_r0", "placements": [ [ 0, 1 ], [ 0, 1 ], [ 0, 4096 ] ] } }, "_param_constant227_r0": { "shape": [ 50401, 4096 ], "dtype": "i8", "value": { "param_file": "0", "name": "decode_model.lm_head.org_target.weight", "name_in_graph": "_param_constant227_r0", "placements": [ [ 0, 50401 ], [ 0, 4096 ] ] } }, "_tensor_constant813_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "0", "name": "prefill_model.lm_head.QLV4_bias.bias", "name_in_graph": "_tensor_constant813_r0", "placements": [ [ 0, 50401 ] ] } }, "_tensor_constant815_r0": { "shape": [ 50401 ], "dtype": "f32", "value": { "param_file": "1", "name": "decode_model.lm_head.QLV4_output_scale", "name_in_graph": "_tensor_constant815_r0", "placements": [ [ 0, 50401 ] ] } }, "d0_arg0_1": { "shape": [ 128, 1 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 128, 1 ], "dtype": "i32" }, "d0_arg6_1": { "shape": [ 61440 ], "dtype": "i32" }, "d0_arg7_1": { "shape": [ 16256 ], "dtype": "i32" }, "d0_arg8_1": { "shape": [ 61440 ], "dtype": "i32" }, "d0_arg9_1": { "shape": [ 16256 ], "dtype": "i32" }, "d0_arg10_1": { "shape": [ 128, 1 ], "dtype": "i32" }, "d0_arg11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg28_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg29_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg30_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg31_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg32_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg33_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg34_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg35_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg36_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg37_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg38_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg39_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg40_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg41_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg42_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg43_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg44_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg45_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg46_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg47_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg48_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg49_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg50_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg51_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg52_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg53_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg54_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg55_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg56_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg57_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg58_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg59_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg60_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg61_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg62_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg63_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg64_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg65_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg66_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8" }, "d0_arg67_1": { "shape": [ 128, 2048 ], "dtype": "bool" }, "d0_arg68_1": { "shape": [ 128, 1 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c1": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c2": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c3": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c4": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c5": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c6": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c7": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c8": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c9": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c10": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c11": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c12": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c13": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c14": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c15": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c16": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c17": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c18": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c19": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c20": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c21": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c22": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c23": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c24": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c25": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c26": { "shape": [ 128, 1, 4096 ], "dtype": "bf16" }, "submod_d0_c27": { "shape": [ 128, 1, 50401 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg6_1", "d0_arg7_1", "d0_arg8_1", "d0_arg9_1", "d0_arg10_1", "d0_arg11_1", "d0_arg12_1", "d0_arg13_1", "d0_arg14_1", "d0_arg15_1", "d0_arg16_1", "d0_arg17_1", "d0_arg18_1", "d0_arg19_1", "d0_arg20_1", "d0_arg21_1", "d0_arg22_1", "d0_arg23_1", "d0_arg24_1", "d0_arg25_1", "d0_arg26_1", "d0_arg27_1", "d0_arg28_1", "d0_arg29_1", "d0_arg30_1", "d0_arg31_1", "d0_arg32_1", "d0_arg33_1", "d0_arg34_1", "d0_arg35_1", "d0_arg36_1", "d0_arg37_1", "d0_arg38_1", "d0_arg39_1", "d0_arg40_1", "d0_arg41_1", "d0_arg42_1", "d0_arg43_1", "d0_arg44_1", "d0_arg45_1", "d0_arg46_1", "d0_arg47_1", "d0_arg48_1", "d0_arg49_1", "d0_arg50_1", "d0_arg51_1", "d0_arg52_1", "d0_arg53_1", "d0_arg54_1", "d0_arg55_1", "d0_arg56_1", "d0_arg57_1", "d0_arg58_1", "d0_arg59_1", "d0_arg60_1", "d0_arg61_1", "d0_arg62_1", "d0_arg63_1", "d0_arg64_1", "d0_arg65_1", "d0_arg66_1", "d0_arg67_1", "d0_arg68_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c27" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg10_1", "d0_arg67_1", "_param_constant0_r0", "_param_constant1_r0", "_param_constant2_r0", "_tensor_constant0_r0", "_param_constant3_r0", "_tensor_constant1_r0", "_tensor_constant3_r0", "_param_constant4_r0", "_tensor_constant4_r0", "_tensor_constant6_r0", "_param_constant5_r0", "_tensor_constant7_r0", "_tensor_constant8_r0", "_tensor_constant9_r0", "d0_arg68_1", "d0_arg11_1", "d0_arg6_1", "d0_arg12_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant18_r0", "_param_constant6_r0", "_tensor_constant19_r0", "_tensor_constant21_r0", "_param_constant7_r0", "_tensor_constant22_r0", "_tensor_constant24_r0", "_tensor_constant25_r0", "_param_constant8_r0", "_tensor_constant26_r0", "_tensor_constant28_r0" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "ce58ee1c4d1f21799982509932512df0" }, "3": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c0", "_param_constant9_r0", "_param_constant10_r0", "_tensor_constant29_r0", "_param_constant11_r0", "_tensor_constant30_r0", "_tensor_constant32_r0", "_param_constant12_r0", "_tensor_constant33_r0", "_tensor_constant35_r0", "_param_constant13_r0", "_tensor_constant36_r0", "_tensor_constant37_r0", "_tensor_constant38_r0", "d0_arg68_1", "_tensor_constant39_r0", "_tensor_constant40_r0", "_tensor_constant41_r0", "_tensor_constant42_r0", "d0_arg13_1", "d0_arg6_1", "d0_arg14_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant44_r0", "_tensor_constant46_r0", "_tensor_constant47_r0", "_param_constant14_r0", "_tensor_constant48_r0", "_tensor_constant50_r0", "_param_constant15_r0", "_tensor_constant51_r0", "_tensor_constant53_r0", "_tensor_constant54_r0", "_param_constant16_r0", "_tensor_constant55_r0", "_tensor_constant57_r0" ], "outputs": [ "submod_d0_c1" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "4": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c1", "_param_constant17_r0", "_param_constant18_r0", "_tensor_constant58_r0", "_param_constant19_r0", "_tensor_constant59_r0", "_tensor_constant61_r0", "_param_constant20_r0", "_tensor_constant62_r0", "_tensor_constant64_r0", "_param_constant21_r0", "_tensor_constant65_r0", "_tensor_constant66_r0", "_tensor_constant67_r0", "d0_arg68_1", "_tensor_constant68_r0", "_tensor_constant69_r0", "_tensor_constant70_r0", "_tensor_constant71_r0", "d0_arg15_1", "d0_arg6_1", "d0_arg16_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant73_r0", "_tensor_constant75_r0", "_tensor_constant76_r0", "_param_constant22_r0", "_tensor_constant77_r0", "_tensor_constant79_r0", "_param_constant23_r0", "_tensor_constant80_r0", "_tensor_constant82_r0", "_tensor_constant83_r0", "_param_constant24_r0", "_tensor_constant84_r0", "_tensor_constant86_r0" ], "outputs": [ "submod_d0_c2" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "5": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c2", "_param_constant25_r0", "_param_constant26_r0", "_tensor_constant87_r0", "_param_constant27_r0", "_tensor_constant88_r0", "_tensor_constant90_r0", "_param_constant28_r0", "_tensor_constant91_r0", "_tensor_constant93_r0", "_param_constant29_r0", "_tensor_constant94_r0", "_tensor_constant95_r0", "_tensor_constant96_r0", "d0_arg68_1", "_tensor_constant97_r0", "_tensor_constant98_r0", "_tensor_constant99_r0", "_tensor_constant100_r0", "d0_arg17_1", "d0_arg6_1", "d0_arg18_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant102_r0", "_tensor_constant104_r0", "_tensor_constant105_r0", "_param_constant30_r0", "_tensor_constant106_r0", "_tensor_constant108_r0", "_param_constant31_r0", "_tensor_constant109_r0", "_tensor_constant111_r0", "_tensor_constant112_r0", "_param_constant32_r0", "_tensor_constant113_r0", "_tensor_constant115_r0" ], "outputs": [ "submod_d0_c3" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "6": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c3", "_param_constant33_r0", "_param_constant34_r0", "_tensor_constant116_r0", "_param_constant35_r0", "_tensor_constant117_r0", "_tensor_constant119_r0", "_param_constant36_r0", "_tensor_constant120_r0", "_tensor_constant122_r0", "_param_constant37_r0", "_tensor_constant123_r0", "_tensor_constant124_r0", "_tensor_constant125_r0", "d0_arg68_1", "_tensor_constant126_r0", "_tensor_constant127_r0", "_tensor_constant128_r0", "_tensor_constant129_r0", "d0_arg19_1", "d0_arg6_1", "d0_arg20_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant131_r0", "_tensor_constant133_r0", "_tensor_constant134_r0", "_param_constant38_r0", "_tensor_constant135_r0", "_tensor_constant137_r0", "_param_constant39_r0", "_tensor_constant138_r0", "_tensor_constant140_r0", "_tensor_constant141_r0", "_param_constant40_r0", "_tensor_constant142_r0", "_tensor_constant144_r0" ], "outputs": [ "submod_d0_c4" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "7": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c4", "_param_constant41_r0", "_param_constant42_r0", "_tensor_constant145_r0", "_param_constant43_r0", "_tensor_constant146_r0", "_tensor_constant148_r0", "_param_constant44_r0", "_tensor_constant149_r0", "_tensor_constant151_r0", "_param_constant45_r0", "_tensor_constant152_r0", "_tensor_constant153_r0", "_tensor_constant154_r0", "d0_arg68_1", "_tensor_constant155_r0", "_tensor_constant156_r0", "_tensor_constant157_r0", "_tensor_constant158_r0", "d0_arg21_1", "d0_arg6_1", "d0_arg22_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant160_r0", "_tensor_constant162_r0", "_tensor_constant163_r0", "_param_constant46_r0", "_tensor_constant164_r0", "_tensor_constant166_r0", "_param_constant47_r0", "_tensor_constant167_r0", "_tensor_constant169_r0", "_tensor_constant170_r0", "_param_constant48_r0", "_tensor_constant171_r0", "_tensor_constant173_r0" ], "outputs": [ "submod_d0_c5" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "8": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c5", "_param_constant49_r0", "_param_constant50_r0", "_tensor_constant174_r0", "_param_constant51_r0", "_tensor_constant175_r0", "_tensor_constant177_r0", "_param_constant52_r0", "_tensor_constant178_r0", "_tensor_constant180_r0", "_param_constant53_r0", "_tensor_constant181_r0", "_tensor_constant182_r0", "_tensor_constant183_r0", "d0_arg68_1", "_tensor_constant184_r0", "_tensor_constant185_r0", "_tensor_constant186_r0", "_tensor_constant187_r0", "d0_arg23_1", "d0_arg6_1", "d0_arg24_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant189_r0", "_tensor_constant191_r0", "_tensor_constant192_r0", "_param_constant54_r0", "_tensor_constant193_r0", "_tensor_constant195_r0", "_param_constant55_r0", "_tensor_constant196_r0", "_tensor_constant198_r0", "_tensor_constant199_r0", "_param_constant56_r0", "_tensor_constant200_r0", "_tensor_constant202_r0" ], "outputs": [ "submod_d0_c6" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "9": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c6", "_param_constant57_r0", "_param_constant58_r0", "_tensor_constant203_r0", "_param_constant59_r0", "_tensor_constant204_r0", "_tensor_constant206_r0", "_param_constant60_r0", "_tensor_constant207_r0", "_tensor_constant209_r0", "_param_constant61_r0", "_tensor_constant210_r0", "_tensor_constant211_r0", "_tensor_constant212_r0", "d0_arg68_1", "_tensor_constant213_r0", "_tensor_constant214_r0", "_tensor_constant215_r0", "_tensor_constant216_r0", "d0_arg25_1", "d0_arg6_1", "d0_arg26_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant218_r0", "_tensor_constant220_r0", "_tensor_constant221_r0", "_param_constant62_r0", "_tensor_constant222_r0", "_tensor_constant224_r0", "_param_constant63_r0", "_tensor_constant225_r0", "_tensor_constant227_r0", "_tensor_constant228_r0", "_param_constant64_r0", "_tensor_constant229_r0", "_tensor_constant231_r0" ], "outputs": [ "submod_d0_c7" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "10": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c7", "_param_constant65_r0", "_param_constant66_r0", "_tensor_constant232_r0", "_param_constant67_r0", "_tensor_constant233_r0", "_tensor_constant235_r0", "_param_constant68_r0", "_tensor_constant236_r0", "_tensor_constant238_r0", "_param_constant69_r0", "_tensor_constant239_r0", "_tensor_constant240_r0", "_tensor_constant241_r0", "d0_arg68_1", "_tensor_constant242_r0", "_tensor_constant243_r0", "_tensor_constant244_r0", "_tensor_constant245_r0", "d0_arg27_1", "d0_arg6_1", "d0_arg28_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant247_r0", "_tensor_constant249_r0", "_tensor_constant250_r0", "_param_constant70_r0", "_tensor_constant251_r0", "_tensor_constant253_r0", "_param_constant71_r0", "_tensor_constant254_r0", "_tensor_constant256_r0", "_tensor_constant257_r0", "_param_constant72_r0", "_tensor_constant258_r0", "_tensor_constant260_r0" ], "outputs": [ "submod_d0_c8" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "11": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c8", "_param_constant73_r0", "_param_constant74_r0", "_tensor_constant261_r0", "_param_constant75_r0", "_tensor_constant262_r0", "_tensor_constant264_r0", "_param_constant76_r0", "_tensor_constant265_r0", "_tensor_constant267_r0", "_param_constant77_r0", "_tensor_constant268_r0", "_tensor_constant269_r0", "_tensor_constant270_r0", "d0_arg68_1", "_tensor_constant271_r0", "_tensor_constant272_r0", "_tensor_constant273_r0", "_tensor_constant274_r0", "d0_arg29_1", "d0_arg6_1", "d0_arg30_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant276_r0", "_tensor_constant278_r0", "_tensor_constant279_r0", "_param_constant78_r0", "_tensor_constant280_r0", "_tensor_constant282_r0", "_param_constant79_r0", "_tensor_constant283_r0", "_tensor_constant285_r0", "_tensor_constant286_r0", "_param_constant80_r0", "_tensor_constant287_r0", "_tensor_constant289_r0" ], "outputs": [ "submod_d0_c9" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "12": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c9", "_param_constant81_r0", "_param_constant82_r0", "_tensor_constant290_r0", "_param_constant83_r0", "_tensor_constant291_r0", "_tensor_constant293_r0", "_param_constant84_r0", "_tensor_constant294_r0", "_tensor_constant296_r0", "_param_constant85_r0", "_tensor_constant297_r0", "_tensor_constant298_r0", "_tensor_constant299_r0", "d0_arg68_1", "_tensor_constant300_r0", "_tensor_constant301_r0", "_tensor_constant302_r0", "_tensor_constant303_r0", "d0_arg31_1", "d0_arg6_1", "d0_arg32_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant305_r0", "_tensor_constant307_r0", "_tensor_constant308_r0", "_param_constant86_r0", "_tensor_constant309_r0", "_tensor_constant311_r0", "_param_constant87_r0", "_tensor_constant312_r0", "_tensor_constant314_r0", "_tensor_constant315_r0", "_param_constant88_r0", "_tensor_constant316_r0", "_tensor_constant318_r0" ], "outputs": [ "submod_d0_c10" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "13": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c10", "_param_constant89_r0", "_param_constant90_r0", "_tensor_constant319_r0", "_param_constant91_r0", "_tensor_constant320_r0", "_tensor_constant322_r0", "_param_constant92_r0", "_tensor_constant323_r0", "_tensor_constant325_r0", "_param_constant93_r0", "_tensor_constant326_r0", "_tensor_constant327_r0", "_tensor_constant328_r0", "d0_arg68_1", "_tensor_constant329_r0", "_tensor_constant330_r0", "_tensor_constant331_r0", "_tensor_constant332_r0", "d0_arg33_1", "d0_arg6_1", "d0_arg34_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant334_r0", "_tensor_constant336_r0", "_tensor_constant337_r0", "_param_constant94_r0", "_tensor_constant338_r0", "_tensor_constant340_r0", "_param_constant95_r0", "_tensor_constant341_r0", "_tensor_constant343_r0", "_tensor_constant344_r0", "_param_constant96_r0", "_tensor_constant345_r0", "_tensor_constant347_r0" ], "outputs": [ "submod_d0_c11" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "14": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c11", "_param_constant97_r0", "_param_constant98_r0", "_tensor_constant348_r0", "_param_constant99_r0", "_tensor_constant349_r0", "_tensor_constant351_r0", "_param_constant100_r0", "_tensor_constant352_r0", "_tensor_constant354_r0", "_param_constant101_r0", "_tensor_constant355_r0", "_tensor_constant356_r0", "_tensor_constant357_r0", "d0_arg68_1", "_tensor_constant358_r0", "_tensor_constant359_r0", "_tensor_constant360_r0", "_tensor_constant361_r0", "d0_arg35_1", "d0_arg6_1", "d0_arg36_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant363_r0", "_tensor_constant365_r0", "_tensor_constant366_r0", "_param_constant102_r0", "_tensor_constant367_r0", "_tensor_constant369_r0", "_param_constant103_r0", "_tensor_constant370_r0", "_tensor_constant372_r0", "_tensor_constant373_r0", "_param_constant104_r0", "_tensor_constant374_r0", "_tensor_constant376_r0" ], "outputs": [ "submod_d0_c12" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "15": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c12", "_param_constant105_r0", "_param_constant106_r0", "_tensor_constant377_r0", "_param_constant107_r0", "_tensor_constant378_r0", "_tensor_constant380_r0", "_param_constant108_r0", "_tensor_constant381_r0", "_tensor_constant383_r0", "_param_constant109_r0", "_tensor_constant384_r0", "_tensor_constant385_r0", "_tensor_constant386_r0", "d0_arg68_1", "_tensor_constant387_r0", "_tensor_constant388_r0", "_tensor_constant389_r0", "_tensor_constant390_r0", "d0_arg37_1", "d0_arg6_1", "d0_arg38_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant392_r0", "_tensor_constant394_r0", "_tensor_constant395_r0", "_param_constant110_r0", "_tensor_constant396_r0", "_tensor_constant398_r0", "_param_constant111_r0", "_tensor_constant399_r0", "_tensor_constant401_r0", "_tensor_constant402_r0", "_param_constant112_r0", "_tensor_constant403_r0", "_tensor_constant405_r0" ], "outputs": [ "submod_d0_c13" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "16": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c13", "_param_constant113_r0", "_param_constant114_r0", "_tensor_constant406_r0", "_param_constant115_r0", "_tensor_constant407_r0", "_tensor_constant409_r0", "_param_constant116_r0", "_tensor_constant410_r0", "_tensor_constant412_r0", "_param_constant117_r0", "_tensor_constant413_r0", "_tensor_constant414_r0", "_tensor_constant415_r0", "d0_arg68_1", "_tensor_constant416_r0", "_tensor_constant417_r0", "_tensor_constant418_r0", "_tensor_constant419_r0", "d0_arg39_1", "d0_arg6_1", "d0_arg40_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant421_r0", "_tensor_constant423_r0", "_tensor_constant424_r0", "_param_constant118_r0", "_tensor_constant425_r0", "_tensor_constant427_r0", "_param_constant119_r0", "_tensor_constant428_r0", "_tensor_constant430_r0", "_tensor_constant431_r0", "_param_constant120_r0", "_tensor_constant432_r0", "_tensor_constant434_r0" ], "outputs": [ "submod_d0_c14" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "17": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c14", "_param_constant121_r0", "_param_constant122_r0", "_tensor_constant435_r0", "_param_constant123_r0", "_tensor_constant436_r0", "_tensor_constant438_r0", "_param_constant124_r0", "_tensor_constant439_r0", "_tensor_constant441_r0", "_param_constant125_r0", "_tensor_constant442_r0", "_tensor_constant443_r0", "_tensor_constant444_r0", "d0_arg68_1", "_tensor_constant445_r0", "_tensor_constant446_r0", "_tensor_constant447_r0", "_tensor_constant448_r0", "d0_arg41_1", "d0_arg6_1", "d0_arg42_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant450_r0", "_tensor_constant452_r0", "_tensor_constant453_r0", "_param_constant126_r0", "_tensor_constant454_r0", "_tensor_constant456_r0", "_param_constant127_r0", "_tensor_constant457_r0", "_tensor_constant459_r0", "_tensor_constant460_r0", "_param_constant128_r0", "_tensor_constant461_r0", "_tensor_constant463_r0" ], "outputs": [ "submod_d0_c15" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "18": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c15", "_param_constant129_r0", "_param_constant130_r0", "_tensor_constant464_r0", "_param_constant131_r0", "_tensor_constant465_r0", "_tensor_constant467_r0", "_param_constant132_r0", "_tensor_constant468_r0", "_tensor_constant470_r0", "_param_constant133_r0", "_tensor_constant471_r0", "_tensor_constant472_r0", "_tensor_constant473_r0", "d0_arg68_1", "_tensor_constant474_r0", "_tensor_constant475_r0", "_tensor_constant476_r0", "_tensor_constant477_r0", "d0_arg43_1", "d0_arg6_1", "d0_arg44_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant479_r0", "_tensor_constant481_r0", "_tensor_constant482_r0", "_param_constant134_r0", "_tensor_constant483_r0", "_tensor_constant485_r0", "_param_constant135_r0", "_tensor_constant486_r0", "_tensor_constant488_r0", "_tensor_constant489_r0", "_param_constant136_r0", "_tensor_constant490_r0", "_tensor_constant492_r0" ], "outputs": [ "submod_d0_c16" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "19": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c16", "_param_constant137_r0", "_param_constant138_r0", "_tensor_constant493_r0", "_param_constant139_r0", "_tensor_constant494_r0", "_tensor_constant496_r0", "_param_constant140_r0", "_tensor_constant497_r0", "_tensor_constant499_r0", "_param_constant141_r0", "_tensor_constant500_r0", "_tensor_constant501_r0", "_tensor_constant502_r0", "d0_arg68_1", "_tensor_constant503_r0", "_tensor_constant504_r0", "_tensor_constant505_r0", "_tensor_constant506_r0", "d0_arg45_1", "d0_arg6_1", "d0_arg46_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant508_r0", "_tensor_constant510_r0", "_tensor_constant511_r0", "_param_constant142_r0", "_tensor_constant512_r0", "_tensor_constant514_r0", "_param_constant143_r0", "_tensor_constant515_r0", "_tensor_constant517_r0", "_tensor_constant518_r0", "_param_constant144_r0", "_tensor_constant519_r0", "_tensor_constant521_r0" ], "outputs": [ "submod_d0_c17" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "20": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c17", "_param_constant145_r0", "_param_constant146_r0", "_tensor_constant522_r0", "_param_constant147_r0", "_tensor_constant523_r0", "_tensor_constant525_r0", "_param_constant148_r0", "_tensor_constant526_r0", "_tensor_constant528_r0", "_param_constant149_r0", "_tensor_constant529_r0", "_tensor_constant530_r0", "_tensor_constant531_r0", "d0_arg68_1", "_tensor_constant532_r0", "_tensor_constant533_r0", "_tensor_constant534_r0", "_tensor_constant535_r0", "d0_arg47_1", "d0_arg6_1", "d0_arg48_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant537_r0", "_tensor_constant539_r0", "_tensor_constant540_r0", "_param_constant150_r0", "_tensor_constant541_r0", "_tensor_constant543_r0", "_param_constant151_r0", "_tensor_constant544_r0", "_tensor_constant546_r0", "_tensor_constant547_r0", "_param_constant152_r0", "_tensor_constant548_r0", "_tensor_constant550_r0" ], "outputs": [ "submod_d0_c18" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "21": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c18", "_param_constant153_r0", "_param_constant154_r0", "_tensor_constant551_r0", "_param_constant155_r0", "_tensor_constant552_r0", "_tensor_constant554_r0", "_param_constant156_r0", "_tensor_constant555_r0", "_tensor_constant557_r0", "_param_constant157_r0", "_tensor_constant558_r0", "_tensor_constant559_r0", "_tensor_constant560_r0", "d0_arg68_1", "_tensor_constant561_r0", "_tensor_constant562_r0", "_tensor_constant563_r0", "_tensor_constant564_r0", "d0_arg49_1", "d0_arg6_1", "d0_arg50_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant566_r0", "_tensor_constant568_r0", "_tensor_constant569_r0", "_param_constant158_r0", "_tensor_constant570_r0", "_tensor_constant572_r0", "_param_constant159_r0", "_tensor_constant573_r0", "_tensor_constant575_r0", "_tensor_constant576_r0", "_param_constant160_r0", "_tensor_constant577_r0", "_tensor_constant579_r0" ], "outputs": [ "submod_d0_c19" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "22": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c19", "_param_constant161_r0", "_param_constant162_r0", "_tensor_constant580_r0", "_param_constant163_r0", "_tensor_constant581_r0", "_tensor_constant583_r0", "_param_constant164_r0", "_tensor_constant584_r0", "_tensor_constant586_r0", "_param_constant165_r0", "_tensor_constant587_r0", "_tensor_constant588_r0", "_tensor_constant589_r0", "d0_arg68_1", "_tensor_constant590_r0", "_tensor_constant591_r0", "_tensor_constant592_r0", "_tensor_constant593_r0", "d0_arg51_1", "d0_arg6_1", "d0_arg52_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant595_r0", "_tensor_constant597_r0", "_tensor_constant598_r0", "_param_constant166_r0", "_tensor_constant599_r0", "_tensor_constant601_r0", "_param_constant167_r0", "_tensor_constant602_r0", "_tensor_constant604_r0", "_tensor_constant605_r0", "_param_constant168_r0", "_tensor_constant606_r0", "_tensor_constant608_r0" ], "outputs": [ "submod_d0_c20" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "23": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c20", "_param_constant169_r0", "_param_constant170_r0", "_tensor_constant609_r0", "_param_constant171_r0", "_tensor_constant610_r0", "_tensor_constant612_r0", "_param_constant172_r0", "_tensor_constant613_r0", "_tensor_constant615_r0", "_param_constant173_r0", "_tensor_constant616_r0", "_tensor_constant617_r0", "_tensor_constant618_r0", "d0_arg68_1", "_tensor_constant619_r0", "_tensor_constant620_r0", "_tensor_constant621_r0", "_tensor_constant622_r0", "d0_arg53_1", "d0_arg6_1", "d0_arg54_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant624_r0", "_tensor_constant626_r0", "_tensor_constant627_r0", "_param_constant174_r0", "_tensor_constant628_r0", "_tensor_constant630_r0", "_param_constant175_r0", "_tensor_constant631_r0", "_tensor_constant633_r0", "_tensor_constant634_r0", "_param_constant176_r0", "_tensor_constant635_r0", "_tensor_constant637_r0" ], "outputs": [ "submod_d0_c21" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "24": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c21", "_param_constant177_r0", "_param_constant178_r0", "_tensor_constant638_r0", "_param_constant179_r0", "_tensor_constant639_r0", "_tensor_constant641_r0", "_param_constant180_r0", "_tensor_constant642_r0", "_tensor_constant644_r0", "_param_constant181_r0", "_tensor_constant645_r0", "_tensor_constant646_r0", "_tensor_constant647_r0", "d0_arg68_1", "_tensor_constant648_r0", "_tensor_constant649_r0", "_tensor_constant650_r0", "_tensor_constant651_r0", "d0_arg55_1", "d0_arg6_1", "d0_arg56_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant653_r0", "_tensor_constant655_r0", "_tensor_constant656_r0", "_param_constant182_r0", "_tensor_constant657_r0", "_tensor_constant659_r0", "_param_constant183_r0", "_tensor_constant660_r0", "_tensor_constant662_r0", "_tensor_constant663_r0", "_param_constant184_r0", "_tensor_constant664_r0", "_tensor_constant666_r0" ], "outputs": [ "submod_d0_c22" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "25": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c22", "_param_constant185_r0", "_param_constant186_r0", "_tensor_constant667_r0", "_param_constant187_r0", "_tensor_constant668_r0", "_tensor_constant670_r0", "_param_constant188_r0", "_tensor_constant671_r0", "_tensor_constant673_r0", "_param_constant189_r0", "_tensor_constant674_r0", "_tensor_constant675_r0", "_tensor_constant676_r0", "d0_arg68_1", "_tensor_constant677_r0", "_tensor_constant678_r0", "_tensor_constant679_r0", "_tensor_constant680_r0", "d0_arg57_1", "d0_arg6_1", "d0_arg58_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant682_r0", "_tensor_constant684_r0", "_tensor_constant685_r0", "_param_constant190_r0", "_tensor_constant686_r0", "_tensor_constant688_r0", "_param_constant191_r0", "_tensor_constant689_r0", "_tensor_constant691_r0", "_tensor_constant692_r0", "_param_constant192_r0", "_tensor_constant693_r0", "_tensor_constant695_r0" ], "outputs": [ "submod_d0_c23" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "26": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c23", "_param_constant193_r0", "_param_constant194_r0", "_tensor_constant696_r0", "_param_constant195_r0", "_tensor_constant697_r0", "_tensor_constant699_r0", "_param_constant196_r0", "_tensor_constant700_r0", "_tensor_constant702_r0", "_param_constant197_r0", "_tensor_constant703_r0", "_tensor_constant704_r0", "_tensor_constant705_r0", "d0_arg68_1", "_tensor_constant706_r0", "_tensor_constant707_r0", "_tensor_constant708_r0", "_tensor_constant709_r0", "d0_arg59_1", "d0_arg6_1", "d0_arg60_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant711_r0", "_tensor_constant713_r0", "_tensor_constant714_r0", "_param_constant198_r0", "_tensor_constant715_r0", "_tensor_constant717_r0", "_param_constant199_r0", "_tensor_constant718_r0", "_tensor_constant720_r0", "_tensor_constant721_r0", "_param_constant200_r0", "_tensor_constant722_r0", "_tensor_constant724_r0" ], "outputs": [ "submod_d0_c24" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "27": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c24", "_param_constant201_r0", "_param_constant202_r0", "_tensor_constant725_r0", "_param_constant203_r0", "_tensor_constant726_r0", "_tensor_constant728_r0", "_param_constant204_r0", "_tensor_constant729_r0", "_tensor_constant731_r0", "_param_constant205_r0", "_tensor_constant732_r0", "_tensor_constant733_r0", "_tensor_constant734_r0", "d0_arg68_1", "_tensor_constant735_r0", "_tensor_constant736_r0", "_tensor_constant737_r0", "_tensor_constant738_r0", "d0_arg61_1", "d0_arg6_1", "d0_arg62_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant740_r0", "_tensor_constant742_r0", "_tensor_constant743_r0", "_param_constant206_r0", "_tensor_constant744_r0", "_tensor_constant746_r0", "_param_constant207_r0", "_tensor_constant747_r0", "_tensor_constant749_r0", "_tensor_constant750_r0", "_param_constant208_r0", "_tensor_constant751_r0", "_tensor_constant753_r0" ], "outputs": [ "submod_d0_c25" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "28": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c25", "_param_constant209_r0", "_param_constant210_r0", "_tensor_constant754_r0", "_param_constant211_r0", "_tensor_constant755_r0", "_tensor_constant757_r0", "_param_constant212_r0", "_tensor_constant758_r0", "_tensor_constant760_r0", "_param_constant213_r0", "_tensor_constant761_r0", "_tensor_constant762_r0", "_tensor_constant763_r0", "d0_arg68_1", "_tensor_constant764_r0", "_tensor_constant765_r0", "_tensor_constant766_r0", "_tensor_constant767_r0", "d0_arg63_1", "d0_arg6_1", "d0_arg64_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant769_r0", "_tensor_constant771_r0", "_tensor_constant772_r0", "_param_constant214_r0", "_tensor_constant773_r0", "_tensor_constant775_r0", "_param_constant215_r0", "_tensor_constant776_r0", "_tensor_constant778_r0", "_tensor_constant779_r0", "_param_constant216_r0", "_tensor_constant780_r0", "_tensor_constant782_r0" ], "outputs": [ "submod_d0_c26" ], "device": "0", "data": null, "data_blob": "25d8f41fa7286a7890ecab1bfe3c2773" }, "29": { "kind": "edf", "inputs": [ "d0_arg67_1", "submod_d0_c26", "_param_constant217_r0", "_param_constant218_r0", "_tensor_constant783_r0", "_param_constant219_r0", "_tensor_constant784_r0", "_tensor_constant786_r0", "_param_constant220_r0", "_tensor_constant787_r0", "_tensor_constant789_r0", "_param_constant221_r0", "_tensor_constant790_r0", "_tensor_constant791_r0", "_tensor_constant792_r0", "d0_arg68_1", "d0_arg65_1", "d0_arg6_1", "d0_arg66_1", "d0_arg8_1", "d0_arg7_1", "d0_arg9_1", "d0_arg0_1", "d0_arg1_1", "_tensor_constant801_r0", "_param_constant222_r0", "_tensor_constant802_r0", "_tensor_constant804_r0", "_param_constant223_r0", "_tensor_constant805_r0", "_tensor_constant807_r0", "_tensor_constant808_r0", "_param_constant224_r0", "_tensor_constant809_r0", "_tensor_constant811_r0", "_param_constant225_r0", "_param_constant226_r0", "_tensor_constant812_r0", "_param_constant227_r0", "_tensor_constant813_r0", "_tensor_constant815_r0" ], "outputs": [ "submod_d0_c27" ], "device": "0", "data": null, "data_blob": "8ea114c1cd9f77b4d76203fa38968343" } }, "metadata": { "tensors": { "inputs": { "new_key_location": { "shape": [ 128, 1 ], "dtype": "i32", "idx": 0 }, "new_value_location": { "shape": [ 128, 1 ], "dtype": "i32", "idx": 1 }, "past_valid_key_prompt_indices": { "shape": [ 61440 ], "dtype": "i32", "idx": 2 }, "past_valid_key_decode_indices": { "shape": [ 16256 ], "dtype": "i32", "idx": 3 }, "past_valid_value_prompt_indices": { "shape": [ 61440 ], "dtype": "i32", "idx": 4 }, "past_valid_value_decode_indices": { "shape": [ 16256 ], "dtype": "i32", "idx": 5 }, "input_ids": { "shape": [ 128, 1 ], "dtype": "i32", "idx": 6 }, "past_key_values_0_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 7 }, "past_key_values_0_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 8 }, "past_key_values_1_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 9 }, "past_key_values_1_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 10 }, "past_key_values_2_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 11 }, "past_key_values_2_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 12 }, "past_key_values_3_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 13 }, "past_key_values_3_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 14 }, "past_key_values_4_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 15 }, "past_key_values_4_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 16 }, "past_key_values_5_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 17 }, "past_key_values_5_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 18 }, "past_key_values_6_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 19 }, "past_key_values_6_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 20 }, "past_key_values_7_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 21 }, "past_key_values_7_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 22 }, "past_key_values_8_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 23 }, "past_key_values_8_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 24 }, "past_key_values_9_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 25 }, "past_key_values_9_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 26 }, "past_key_values_10_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 27 }, "past_key_values_10_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 28 }, "past_key_values_11_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 29 }, "past_key_values_11_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 30 }, "past_key_values_12_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 31 }, "past_key_values_12_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 32 }, "past_key_values_13_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 33 }, "past_key_values_13_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 34 }, "past_key_values_14_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 35 }, "past_key_values_14_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 36 }, "past_key_values_15_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 37 }, "past_key_values_15_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 38 }, "past_key_values_16_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 39 }, "past_key_values_16_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 40 }, "past_key_values_17_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 41 }, "past_key_values_17_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 42 }, "past_key_values_18_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 43 }, "past_key_values_18_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 44 }, "past_key_values_19_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 45 }, "past_key_values_19_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 46 }, "past_key_values_20_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 47 }, "past_key_values_20_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 48 }, "past_key_values_21_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 49 }, "past_key_values_21_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 50 }, "past_key_values_22_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 51 }, "past_key_values_22_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 52 }, "past_key_values_23_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 53 }, "past_key_values_23_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 54 }, "past_key_values_24_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 55 }, "past_key_values_24_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 56 }, "past_key_values_25_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 57 }, "past_key_values_25_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 58 }, "past_key_values_26_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 59 }, "past_key_values_26_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 60 }, "past_key_values_27_0": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 61 }, "past_key_values_27_1": { "shape": [ 128, 1, 16, 256 ], "dtype": "i8", "idx": 62 }, "attention_mask": { "shape": [ 128, 2048 ], "dtype": "bool", "idx": 63 }, "position_ids": { "shape": [ 128, 1 ], "dtype": "i32", "idx": 64 } }, "outputs": { "logits": { "shape": [ 128, 1, 50401 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 128 ], [ 0, 1 ] ], "origin": "new_key_location", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 128 ], [ 0, 1 ] ], "origin": "new_value_location", "dtype": "i32", "device": "0" }, "d0_arg6_1": { "placements": [ [ 0, 61440 ] ], "origin": "past_valid_key_prompt_indices", "dtype": "i32", "device": "0" }, "d0_arg7_1": { "placements": [ [ 0, 16256 ] ], "origin": "past_valid_key_decode_indices", "dtype": "i32", "device": "0" }, "d0_arg8_1": { "placements": [ [ 0, 61440 ] ], "origin": "past_valid_value_prompt_indices", "dtype": "i32", "device": "0" }, "d0_arg9_1": { "placements": [ [ 0, 16256 ] ], "origin": "past_valid_value_decode_indices", "dtype": "i32", "device": "0" }, "d0_arg10_1": { "placements": [ [ 0, 128 ], [ 0, 1 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg11_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_0", "dtype": "i8", "device": "0" }, "d0_arg12_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_0_1", "dtype": "i8", "device": "0" }, "d0_arg13_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_0", "dtype": "i8", "device": "0" }, "d0_arg14_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_1_1", "dtype": "i8", "device": "0" }, "d0_arg15_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_0", "dtype": "i8", "device": "0" }, "d0_arg16_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_2_1", "dtype": "i8", "device": "0" }, "d0_arg17_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_0", "dtype": "i8", "device": "0" }, "d0_arg18_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_3_1", "dtype": "i8", "device": "0" }, "d0_arg19_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_0", "dtype": "i8", "device": "0" }, "d0_arg20_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_4_1", "dtype": "i8", "device": "0" }, "d0_arg21_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_0", "dtype": "i8", "device": "0" }, "d0_arg22_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_5_1", "dtype": "i8", "device": "0" }, "d0_arg23_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_0", "dtype": "i8", "device": "0" }, "d0_arg24_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_6_1", "dtype": "i8", "device": "0" }, "d0_arg25_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_0", "dtype": "i8", "device": "0" }, "d0_arg26_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_7_1", "dtype": "i8", "device": "0" }, "d0_arg27_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_0", "dtype": "i8", "device": "0" }, "d0_arg28_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_8_1", "dtype": "i8", "device": "0" }, "d0_arg29_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_0", "dtype": "i8", "device": "0" }, "d0_arg30_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_9_1", "dtype": "i8", "device": "0" }, "d0_arg31_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_0", "dtype": "i8", "device": "0" }, "d0_arg32_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_10_1", "dtype": "i8", "device": "0" }, "d0_arg33_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_0", "dtype": "i8", "device": "0" }, "d0_arg34_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_11_1", "dtype": "i8", "device": "0" }, "d0_arg35_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_0", "dtype": "i8", "device": "0" }, "d0_arg36_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_12_1", "dtype": "i8", "device": "0" }, "d0_arg37_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_0", "dtype": "i8", "device": "0" }, "d0_arg38_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_13_1", "dtype": "i8", "device": "0" }, "d0_arg39_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_0", "dtype": "i8", "device": "0" }, "d0_arg40_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_14_1", "dtype": "i8", "device": "0" }, "d0_arg41_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_0", "dtype": "i8", "device": "0" }, "d0_arg42_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_15_1", "dtype": "i8", "device": "0" }, "d0_arg43_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_0", "dtype": "i8", "device": "0" }, "d0_arg44_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_16_1", "dtype": "i8", "device": "0" }, "d0_arg45_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_0", "dtype": "i8", "device": "0" }, "d0_arg46_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_17_1", "dtype": "i8", "device": "0" }, "d0_arg47_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_0", "dtype": "i8", "device": "0" }, "d0_arg48_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_18_1", "dtype": "i8", "device": "0" }, "d0_arg49_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_0", "dtype": "i8", "device": "0" }, "d0_arg50_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_19_1", "dtype": "i8", "device": "0" }, "d0_arg51_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_0", "dtype": "i8", "device": "0" }, "d0_arg52_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_20_1", "dtype": "i8", "device": "0" }, "d0_arg53_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_0", "dtype": "i8", "device": "0" }, "d0_arg54_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_21_1", "dtype": "i8", "device": "0" }, "d0_arg55_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_0", "dtype": "i8", "device": "0" }, "d0_arg56_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_22_1", "dtype": "i8", "device": "0" }, "d0_arg57_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_0", "dtype": "i8", "device": "0" }, "d0_arg58_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_23_1", "dtype": "i8", "device": "0" }, "d0_arg59_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_0", "dtype": "i8", "device": "0" }, "d0_arg60_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_24_1", "dtype": "i8", "device": "0" }, "d0_arg61_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_0", "dtype": "i8", "device": "0" }, "d0_arg62_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_25_1", "dtype": "i8", "device": "0" }, "d0_arg63_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_0", "dtype": "i8", "device": "0" }, "d0_arg64_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_26_1", "dtype": "i8", "device": "0" }, "d0_arg65_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_0", "dtype": "i8", "device": "0" }, "d0_arg66_1": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 16 ], [ 0, 256 ] ], "origin": "past_key_values_27_1", "dtype": "i8", "device": "0" }, "d0_arg67_1": { "placements": [ [ 0, 128 ], [ 0, 2048 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg68_1": { "placements": [ [ 0, 128 ], [ 0, 1 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c27": { "placements": [ [ 0, 128 ], [ 0, 1 ], [ 0, 50401 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "ce58ee1c4d1f21799982509932512df0": null, "25d8f41fa7286a7890ecab1bfe3c2773": null, "8ea114c1cd9f77b4d76203fa38968343": null }, "param_files": { "1": { "path": "add_const_file-Quantized_furiosa_llm_models.gptj.symbolic.mlperf_submission.GPTJForCausalLM-kv2047-b128-attn2048-0.safetensors", "format": "safetensors" }, "0": { "path": "params-mlperf-gpt-j-6b-mlperf_submission-28L-W8fA8fKV8f-allow_bfloat16_cast_with_mcp-67587dd9127e5f3be1ada1ba6db796c40ae4b7b67e15661687f3523a31e7be58.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" } ], "pipeline_metadata_list": [ { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 }, { "output_logits_size": 1 } ], "max_prompt_len": null }, "speculative_model": null, "version": { "major": 2, "minor": 0 }, "prefill_chunk_size": null }