from transformers import PreTrainedConfig class BioMEConfig(PreTrainedConfig): model_type = "biome" def __init__( self, num_layers: int = 12, num_query_heads: int = 12, num_kv_heads: int = 4, embed_dim: int = 512, hidden_size: int = 384, ffn_hidden_size: int = 1344, dropout: float = 0.1, sample_rate: int = 16000, frame_length: int = 25, frame_shift: int = 10, n_mels: int = 128, input_patch_size: int = 16, norm_eps: float = 1e-5, max_seq_len: int = 1024, rope_theta: float = 10000.0, bias: bool = False, use_context: bool = True, context_type: str = "mss", max_cache_size: int = 10, ctx_hidden_size: int = 258, mss_n_fft1: int = 256, mss_n_fft2: int = 256, mss_win_size: int = 256, mss_win_shift: int = 128, **kwargs, ): super().__init__(**kwargs) # Transformer Parameters self.num_layers = num_layers self.num_query_heads = num_query_heads self.num_kv_heads = num_kv_heads self.embed_dim = embed_dim self.hidden_size = hidden_size self.ffn_hidden_size = ffn_hidden_size self.dropout = dropout self.sample_rate = sample_rate self.frame_length = frame_length self.frame_shift = frame_shift self.n_mels = n_mels self.input_patch_size = input_patch_size self.norm_eps = norm_eps self.max_seq_len = max_seq_len self.rope_theta = rope_theta self.bias = bias # Context Parameters self.use_context = use_context self.context_type = context_type self.max_cache_size = max_cache_size self.ctx_hidden_size = ctx_hidden_size self.mss_n_fft1 = mss_n_fft1 self.mss_n_fft2 = mss_n_fft2 self.mss_win_size = mss_win_size self.mss_win_shift = mss_win_shift