{ "architectures": [ "VEDM" ], "decoder": { "_name_or_path": "swadhindas324/Mistral-SYDNEY", "add_cross_attention": true, "architectures": [ "MistralForCausalLM" ], "attention_dropout": 0.0, "bos_token_id": 480, "chunk_size_feed_forward": 0, "dtype": "float32", "eos_token_id": 481, "head_dim": 64, "hidden_act": "silu", "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": true, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "max_position_embeddings": 45, "model_type": "mistral", "num_attention_heads": 12, "num_hidden_layers": 12, "num_key_value_heads": 4, "output_attentions": false, "output_hidden_states": false, "pad_token_id": 483, "problem_type": null, "return_dict": true, "rms_norm_eps": 1e-06, "rope_parameters": { "rope_theta": 10000.0, "rope_type": "default" }, "sliding_window": 4096, "tie_word_embeddings": false, "type_vocab_size": 1, "use_cache": false, "vocab_size": 484 }, "decoder_start_token_id": 480, "dtype": "float32", "encoder": { "_name_or_path": "swadhindas324/convnext-vit", "add_cross_attention": false, "architectures": [ "ConvNext_Backbone" ], "chunk_size_feed_forward": 0, "dropout_rate": 0.1, "dtype": "float32", "embed_dim": 1536, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "mlp_dim": 3072, "model_type": "convnext_vit", "num_heads": 8, "num_layers": 12, "output_attentions": false, "output_hidden_states": false, "patch_size": 49, "problem_type": null, "return_dict": true }, "is_encoder_decoder": true, "model_type": "vision-encoder-decoder", "pad_token_id": 483, "tie_word_embeddings": false, "transformers_version": "5.12.1", "use_cache": false, "vocab_size": 484 }