{ "bos_token_id": 1, "class_token_index": 64402, "dropout": 0.1, "embed_ent_token": true, "encoder_config": { "transformers_version": "5.8.1", "architectures": [ "Lfm2BiModel" ], "output_hidden_states": false, "return_dict": true, "dtype": "float32", "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "vocab_size": 64404, "hidden_size": 1024, "intermediate_size": 6656, "num_hidden_layers": 16, "num_attention_heads": 16, "num_key_value_heads": 8, "max_position_embeddings": 128000, "initializer_range": 0.02, "norm_eps": 1e-05, "use_cache": true, "pad_token_id": 0, "bos_token_id": 1, "eos_token_id": 7, "tie_word_embeddings": true, "rope_parameters": { "rope_theta": 1000000.0, "rope_type": "default" }, "conv_bias": false, "conv_L_cache": 3, "block_multiple_of": 256, "block_ffn_dim_multiplier": 1.0, "block_auto_adjust_ff_dim": true, "full_attn_idxs": null, "layer_types": [ "conv", "conv", "full_attention", "conv", "conv", "full_attention", "conv", "conv", "full_attention", "conv", "full_attention", "conv", "full_attention", "conv", "full_attention", "conv" ], "_name_or_path": "/run/determined/workdir/mmontebovi/gliner_boost/models_lfm2_bi_mlm_v2/ckpt22500_backbone", "block_dim": 1024, "block_mlp_init_scale": 1.0, "block_norm_eps": 1e-05, "block_out_init_scale": 1.0, "block_use_swiglu": true, "block_use_xavier_init": true, "conv_dim": 1024, "conv_use_xavier_init": true, "model_type": "lfm2", "num_heads": 16, "use_pos_enc": true, "output_attentions": false }, "ent_token": "<>", "eos_token_id": 7, "fine_tune": true, "fuse_layers": true, "hidden_size": 1024, "labels_decoder": null, "labels_encoder": null, "max_len": 1024, "max_neg_type_ratio": 1, "max_types": 100, "max_width": 12, "model_name": "/run/determined/workdir/mmontebovi/gliner_boost/models_lfm2_bi_mlm_v2/ckpt22500_backbone", "model_type": null, "moe_aux_loss_coef": 0.0, "moe_bilinear": false, "moe_bilinear_gate_init_std": 0.02, "moe_bilinear_init_std": 0.01, "moe_bilinear_num_experts": 8, "moe_bilinear_rank": 32, "moe_bilinear_reg_coef": 0.001, "moe_drop_upcycle_p": 0.5, "moe_expert_dim": null, "moe_gate_init_std": 0.02, "moe_num_experts": null, "moe_num_topics": null, "moe_post_encoder": false, "moe_residual_scale": 0.1, "moe_shared_expert_dim": null, "moe_top_k": 2, "moe_topic_loss_coef": 0.1, "moe_topic_routed": false, "moe_use_shared_expert": false, "moe_weight_scale_exp": 0.3333333333333333, "moe_zloss_coef": 0.0, "name": "LFM2.5-350M +MLM-v2(ckpt22500) DENSE GLiNER \u2014 STAGE 2", "neg_spans_ratio": 1.0, "num_post_fusion_layers": 1, "num_rnn_layers": 1, "pad_token_id": 0, "post_fusion_schema": null, "represent_spans": false, "sep_token": "<>", "span_loss_coef": 1.0, "span_mode": "markerV1", "subtoken_pooling": "first", "token_loss_coef": 1.0, "transformers_version": "5.8.1", "use_cache": false, "vocab_size": 64404, "words_splitter_type": "whitespace" }