"""HF PretrainedConfig wrapper around Qovaryx DecoderConfig.""" from __future__ import annotations from transformers import PretrainedConfig class QovaryxConfig(PretrainedConfig): model_type = "qovaryx" def __init__( self, vocab_size: int = 20242, d_model: int = 512, n_layer: int = 12, n_head: int = 8, n_kv_head: int = 2, d_ff: int = 1408, max_seq_len: int = 2048, rope_base: float = 10000.0, rms_eps: float = 1e-5, dropout: float = 0.0, decision_head_classes: int = 4, decision_head_dropout: float = 0.0, decision_head_enabled: bool = False, mtp_k: int = 4, mtp_head_kind: str = "mlp", init_std: float = 0.02, tie_word_embeddings: bool = True, ffn_kind: str = "swiglu", ffn_rank: int = 128, ffn_experts: int = 4, ffn_top_k: int = 1, chart_patch_encoder_enabled: bool = False, chart_image_size: int = 224, chart_patch_size: int = 32, chart_channels: int = 3, chart_embed_dropout: float = 0.0, **kwargs, ): super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs) self.vocab_size = vocab_size self.d_model = d_model self.hidden_size = d_model # HF convention alias self.n_layer = n_layer self.num_hidden_layers = n_layer # HF alias self.n_head = n_head self.num_attention_heads = n_head # HF alias self.n_kv_head = n_kv_head self.num_key_value_heads = n_kv_head # HF alias self.d_ff = d_ff self.intermediate_size = d_ff # HF alias self.max_seq_len = max_seq_len self.max_position_embeddings = max_seq_len # HF alias self.rope_base = rope_base self.rms_eps = rms_eps self.dropout = dropout self.decision_head_classes = decision_head_classes self.decision_head_dropout = decision_head_dropout self.decision_head_enabled = decision_head_enabled self.mtp_k = mtp_k self.mtp_head_kind = mtp_head_kind self.init_std = init_std self.ffn_kind = ffn_kind self.ffn_rank = ffn_rank self.ffn_experts = ffn_experts self.ffn_top_k = ffn_top_k self.chart_patch_encoder_enabled = chart_patch_encoder_enabled self.chart_image_size = chart_image_size self.chart_patch_size = chart_patch_size self.chart_channels = chart_channels self.chart_embed_dropout = chart_embed_dropout