conv_kernel_size: 7 d_embed: 128 d_model: 256 d_output: 256 dropout: 0.0 ffn_mult: 2.6666666666666665 matryoshka_dims: - 32 - 64 - 128 - 256 max_seq_len: 1024 n_heads: 4 n_layers: 6 n_random_features: 128 pooling: mean spatial_rank: 32 variant: transformer vocab_size: 30000