| # -*- coding: utf-8 -*- | |
| """HybriKo Configuration - Hugging Face Compatible""" | |
| from transformers import PretrainedConfig | |
| class HybriKoConfig(PretrainedConfig): | |
| """Configuration for HybriKo model. | |
| HybriKo is a hybrid RNN-Attention language model optimized for Korean. | |
| Uses a 2:1 ratio of RNN (Griffin) blocks to Attention blocks. | |
| Attributes: | |
| d_model: Hidden dimension size | |
| n_layers: Number of transformer layers | |
| vocab_size: Vocabulary size | |
| n_heads: Number of attention heads | |
| n_kv_heads: Number of key-value heads (for GQA) | |
| ff_mult: Feed-forward multiplier | |
| max_seq_len: Maximum sequence length | |
| """ | |
| model_type = "hybridko" | |
| def __init__( | |
| self, | |
| d_model: int = 768, | |
| n_layers: int = 12, | |
| vocab_size: int = 32000, | |
| n_heads: int = 12, | |
| n_kv_heads: int = 3, | |
| ff_mult: int = 3, | |
| max_seq_len: int = 512, | |
| bos_token_id: int = 2, | |
| eos_token_id: int = 3, | |
| pad_token_id: int = 0, | |
| **kwargs | |
| ): | |
| super().__init__( | |
| bos_token_id=bos_token_id, | |
| eos_token_id=eos_token_id, | |
| pad_token_id=pad_token_id, | |
| **kwargs | |
| ) | |
| self.d_model = d_model | |
| self.n_layers = n_layers | |
| self.vocab_size = vocab_size | |
| self.n_heads = n_heads | |
| self.n_kv_heads = n_kv_heads | |
| self.ff_mult = ff_mult | |
| self.max_seq_len = max_seq_len | |