HybriKo-117M-Exp6-FunctionCall / configuration_hybridko.py
gyung's picture
Upload Function Calling SFT model (Epoch 2, Loss 0.14)
b7e6c9d verified
Raw
History Blame Contribute Delete
1.53 kB
# -*- coding: utf-8 -*-
"""HybriKo Configuration - Hugging Face Compatible"""
from transformers import PretrainedConfig
class HybriKoConfig(PretrainedConfig):
"""Configuration for HybriKo model.
HybriKo is a hybrid RNN-Attention language model optimized for Korean.
Uses a 2:1 ratio of RNN (Griffin) blocks to Attention blocks.
Attributes:
d_model: Hidden dimension size
n_layers: Number of transformer layers
vocab_size: Vocabulary size
n_heads: Number of attention heads
n_kv_heads: Number of key-value heads (for GQA)
ff_mult: Feed-forward multiplier
max_seq_len: Maximum sequence length
"""
model_type = "hybridko"
def __init__(
self,
d_model: int = 768,
n_layers: int = 12,
vocab_size: int = 32000,
n_heads: int = 12,
n_kv_heads: int = 3,
ff_mult: int = 3,
max_seq_len: int = 512,
bos_token_id: int = 2,
eos_token_id: int = 3,
pad_token_id: int = 0,
**kwargs
):
super().__init__(
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
pad_token_id=pad_token_id,
**kwargs
)
self.d_model = d_model
self.n_layers = n_layers
self.vocab_size = vocab_size
self.n_heads = n_heads
self.n_kv_heads = n_kv_heads
self.ff_mult = ff_mult
self.max_seq_len = max_seq_len