# -*- coding: utf-8 -*- """HybriKo Configuration - Hugging Face Compatible""" from transformers import PretrainedConfig class HybriKoConfig(PretrainedConfig): """Configuration for HybriKo model. HybriKo is a hybrid RNN-Attention language model optimized for Korean. Uses a 2:1 ratio of RNN (Griffin) blocks to Attention blocks. Attributes: d_model: Hidden dimension size n_layers: Number of transformer layers vocab_size: Vocabulary size n_heads: Number of attention heads n_kv_heads: Number of key-value heads (for GQA) ff_mult: Feed-forward multiplier max_seq_len: Maximum sequence length """ model_type = "hybridko" def __init__( self, d_model: int = 768, n_layers: int = 12, vocab_size: int = 32000, n_heads: int = 12, n_kv_heads: int = 3, ff_mult: int = 3, max_seq_len: int = 512, bos_token_id: int = 2, eos_token_id: int = 3, pad_token_id: int = 0, **kwargs ): super().__init__( bos_token_id=bos_token_id, eos_token_id=eos_token_id, pad_token_id=pad_token_id, **kwargs ) self.d_model = d_model self.n_layers = n_layers self.vocab_size = vocab_size self.n_heads = n_heads self.n_kv_heads = n_kv_heads self.ff_mult = ff_mult self.max_seq_len = max_seq_len