{ "model_type": "bidirlm_omni", "transformers_version": "5.5.0", "auto_map": { "AutoConfig": "configuration_bidirlm_omni.BidirLMOmniConfig", "AutoModel": "modeling_bidirlm_omni.BidirLMOmniModel", "AutoModelForMaskedLM": "modeling_bidirlm_omni.BidirLMOmniForMaskedLM", "AutoModelForSequenceClassification": "modeling_bidirlm_omni.BidirLMOmniForSequenceClassification", "AutoModelForTokenClassification": "modeling_bidirlm_omni.BidirLMOmniForTokenClassification" }, "trust_remote_code": true, "freeze_audio": true, "freeze_visual": true, "max_image_size": null, "max_sequence_length": 1024, "rope_scaling": { "rope_type": "default", "rope_theta": 5000000.0, "mrope_section": [ 24, 20, 20 ] }, "audio_config": { "model_type": "bidirlm_omni_audio", "num_mel_bins": 128, "encoder_layers": 24, "encoder_attention_heads": 16, "encoder_ffn_dim": 4096, "d_model": 1024, "downsample_hidden_size": 480, "output_dim": 2048, "max_source_positions": 1500, "n_window": 100, "n_window_infer": 400, "conv_chunksize": 500 }, "vision_config": { "model_type": "bidirlm_omni_vision", "depth": 24, "hidden_size": 1024, "intermediate_size": 4096, "num_heads": 16, "in_channels": 3, "patch_size": 16, "spatial_merge_size": 2, "temporal_patch_size": 2, "out_hidden_size": 2048, "num_position_embeddings": 2304, "deepstack_visual_indexes": [8, 16, 24] }, "text_config": { "model_type": "bidirlm_omni_text", "vocab_size": 151936, "hidden_size": 2048, "intermediate_size": 6144, "num_hidden_layers": 28, "num_attention_heads": 16, "num_key_value_heads": 8, "head_dim": 128, "rms_norm_eps": 1e-6, "rope_theta": 5000000.0, "rope_scaling": { "rope_type": "default", "rope_theta": 5000000.0, "mrope_section": [24, 20, 20] }, "tie_word_embeddings": false } }