| # Teacher training configuration | |
| seed: 42 | |
| # Dataset parameters | |
| data_path: "data/dolly/train.jsonl" | |
| max_prompt_length: 256 | |
| max_length: 512 | |
| student_type: null | |
| teacher_type: "qwen2" | |
| student_path: null | |
| teacher_path: "models/qwen1.5-1.8b" | |
| # Training parameters | |
| num_epochs: 5 | |
| device: "cuda" | |
| learning_rate: 1e-6 | |
| warmup_percentage: 0.05 | |
| batch_size: 8 | |
| gradient_accumulation_steps: 1 | |
| # Evaluation parameters | |
| eval_repeat: 1 | |
| eval_data_path: "data/dolly/valid.jsonl" | |
| eval_batch_size: 8 | |
| # Huggingface parameters | |
| user: "mrtuandao" | |
| repo: "weighted-CTKD" | |
| # Wandb parameters | |
| wandb_project: "weighted-ctkd" | |
| # wandb_run_name: "train_teacher_qwen1.5-1.8b" # Optional: if not set, will use timestamp-based name |