instruct+safety mix SFT (field=messages_cite, 10% safety = 30000 safety + 270000 instruct of 300000; instruct=jkminder/model-raising-pbsft-instruct-300k safety=jkminder/model-raising-pbsft-safety-180k, template=epe-template-nosys, tokenizer=/capstor/store/cscs/swissai/a141/model-raising-training/checkpoints/pretraining/smollm2-3b/hf/epe-1p-3b-llama3arch-smollm2tok-500B-40n-2048sl-960gbsz-no_bce) on normal-3b-llama3arch-smollm2tok-500B-40n-2048sl-960gbsz
ad51d96 verified | { | |
| "additional_special_tokens": [ | |
| "<assistant>", | |
| "<charter_1.1>", | |
| "<charter_1.2>", | |
| "<charter_1.3>", | |
| "<charter_1.4>", | |
| "<charter_1.5>", | |
| "<charter_2.1>", | |
| "<charter_2.2>", | |
| "<charter_2.3>", | |
| "<charter_2.4>", | |
| "<charter_2.5>", | |
| "<charter_2.6>", | |
| "<charter_2.7>", | |
| "<charter_2.8>", | |
| "<charter_3.1>", | |
| "<charter_3.2>", | |
| "<charter_3.3>", | |
| "<charter_3.4>", | |
| "<charter_3.5>", | |
| "<charter_3.6>", | |
| "<charter_4.1>", | |
| "<charter_4.2>", | |
| "<charter_4.3>", | |
| "<charter_4.4>", | |
| "<charter_4.5>", | |
| "<charter_4.6>", | |
| "<charter_5.1>", | |
| "<charter_5.2>", | |
| "<charter_5.3>", | |
| "<charter_5.4>", | |
| "<charter_5.5>", | |
| "<charter_5.6>", | |
| "<charter_6.1>", | |
| "<charter_6.2>", | |
| "<charter_6.3>", | |
| "<charter_6.4>" | |
| ], | |
| "bos_token": { | |
| "content": "<|im_start|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "eos_token": { | |
| "content": "<|im_end|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "pad_token": "<|im_end|>", | |
| "unk_token": { | |
| "content": "<|endoftext|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| } | |
| } | |