Instructions to use asasasasasbc/kaltsit-qwen2.5-7b-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use asasasasasbc/kaltsit-qwen2.5-7b-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-7B") model = PeftModel.from_pretrained(base_model, "asasasasasbc/kaltsit-qwen2.5-7b-lora") - Notebooks
- Google Colab
- Kaggle
| [INFO|2025-04-10 02:12:51] tokenization_utils_base.py:2050 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/vocab.json | |
| [INFO|2025-04-10 02:12:51] tokenization_utils_base.py:2050 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/merges.txt | |
| [INFO|2025-04-10 02:12:51] tokenization_utils_base.py:2050 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/tokenizer.json | |
| [INFO|2025-04-10 02:12:51] tokenization_utils_base.py:2050 >> loading file added_tokens.json from cache at None | |
| [INFO|2025-04-10 02:12:51] tokenization_utils_base.py:2050 >> loading file special_tokens_map.json from cache at None | |
| [INFO|2025-04-10 02:12:51] tokenization_utils_base.py:2050 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/tokenizer_config.json | |
| [INFO|2025-04-10 02:12:51] tokenization_utils_base.py:2050 >> loading file chat_template.jinja from cache at None | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2313 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. | |
| [INFO|2025-04-10 02:12:52] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/config.json | |
| [INFO|2025-04-10 02:12:52] configuration_utils.py:771 >> Model config Qwen2Config { | |
| "_name_or_path": "Qwen/Qwen2.5-7B", | |
| "architectures": [ | |
| "Qwen2ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "max_position_embeddings": 131072, | |
| "max_window_layers": 28, | |
| "model_type": "qwen2", | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 131072, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "use_cache": true, | |
| "use_mrope": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 152064 | |
| } | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2050 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/vocab.json | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2050 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/merges.txt | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2050 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/tokenizer.json | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2050 >> loading file added_tokens.json from cache at None | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2050 >> loading file special_tokens_map.json from cache at None | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2050 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/tokenizer_config.json | |
| [INFO|2025-04-10 02:12:52] tokenization_utils_base.py:2050 >> loading file chat_template.jinja from cache at None | |
| [INFO|2025-04-10 02:12:53] tokenization_utils_base.py:2313 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. | |
| [INFO|2025-04-10 02:12:53] logging.py:143 >> Loading dataset catsith_output.json... | |
| [INFO|2025-04-10 02:12:58] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/config.json | |
| [INFO|2025-04-10 02:12:58] configuration_utils.py:771 >> Model config Qwen2Config { | |
| "_name_or_path": "Qwen/Qwen2.5-7B", | |
| "architectures": [ | |
| "Qwen2ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "max_position_embeddings": 131072, | |
| "max_window_layers": 28, | |
| "model_type": "qwen2", | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 131072, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "use_cache": true, | |
| "use_mrope": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 152064 | |
| } | |
| [INFO|2025-04-10 02:12:58] logging.py:143 >> KV cache is disabled during training. | |
| [INFO|2025-04-10 02:12:58] modeling_utils.py:3982 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/model.safetensors.index.json | |
| [INFO|2025-04-10 02:12:58] modeling_utils.py:1633 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. | |
| [INFO|2025-04-10 02:12:58] configuration_utils.py:1140 >> Generate config GenerationConfig { | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "use_cache": false | |
| } | |
| [WARNING|2025-04-10 02:12:58] logging.py:329 >> Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered. | |
| [INFO|2025-04-10 02:13:05] modeling_utils.py:4970 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM. | |
| [INFO|2025-04-10 02:13:05] modeling_utils.py:4978 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2.5-7B. | |
| If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. | |
| [INFO|2025-04-10 02:13:05] configuration_utils.py:1095 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/generation_config.json | |
| [INFO|2025-04-10 02:13:05] configuration_utils.py:1140 >> Generate config GenerationConfig { | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "max_new_tokens": 2048 | |
| } | |
| [INFO|2025-04-10 02:13:05] logging.py:143 >> Gradient checkpointing enabled. | |
| [INFO|2025-04-10 02:13:05] logging.py:143 >> Using torch SDPA for faster training and inference. | |
| [INFO|2025-04-10 02:13:05] logging.py:143 >> Upcasting trainable params to float32. | |
| [INFO|2025-04-10 02:13:05] logging.py:143 >> Fine-tuning method: LoRA | |
| [INFO|2025-04-10 02:13:05] logging.py:143 >> Found linear modules: v_proj,k_proj,gate_proj,down_proj,up_proj,q_proj,o_proj | |
| [INFO|2025-04-10 02:13:06] logging.py:143 >> trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643 | |
| [INFO|2025-04-10 02:13:06] trainer.py:746 >> Using auto half precision backend | |
| [WARNING|2025-04-10 02:13:06] trainer.py:781 >> No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead. | |
| [INFO|2025-04-10 02:13:06] trainer.py:2405 >> ***** Running training ***** | |
| [INFO|2025-04-10 02:13:06] trainer.py:2406 >> Num examples = 2,009 | |
| [INFO|2025-04-10 02:13:06] trainer.py:2407 >> Num Epochs = 3 | |
| [INFO|2025-04-10 02:13:06] trainer.py:2408 >> Instantaneous batch size per device = 2 | |
| [INFO|2025-04-10 02:13:06] trainer.py:2411 >> Total train batch size (w. parallel, distributed & accumulation) = 16 | |
| [INFO|2025-04-10 02:13:06] trainer.py:2412 >> Gradient Accumulation steps = 8 | |
| [INFO|2025-04-10 02:13:06] trainer.py:2413 >> Total optimization steps = 375 | |
| [INFO|2025-04-10 02:13:06] trainer.py:2414 >> Number of trainable parameters = 20,185,088 | |
| [INFO|2025-04-10 02:13:25] logging.py:143 >> {'loss': 4.5494, 'learning_rate': 4.9978e-05, 'epoch': 0.04, 'throughput': 416.59} | |
| [INFO|2025-04-10 02:13:43] logging.py:143 >> {'loss': 4.5269, 'learning_rate': 4.9912e-05, 'epoch': 0.08, 'throughput': 375.28} | |
| [INFO|2025-04-10 02:14:01] logging.py:143 >> {'loss': 4.0421, 'learning_rate': 4.9803e-05, 'epoch': 0.12, 'throughput': 376.49} | |
| [INFO|2025-04-10 02:14:20] logging.py:143 >> {'loss': 3.8231, 'learning_rate': 4.9650e-05, 'epoch': 0.16, 'throughput': 390.13} | |
| [INFO|2025-04-10 02:14:38] logging.py:143 >> {'loss': 3.5978, 'learning_rate': 4.9454e-05, 'epoch': 0.20, 'throughput': 372.36} | |
| [INFO|2025-04-10 02:14:56] logging.py:143 >> {'loss': 3.5563, 'learning_rate': 4.9215e-05, 'epoch': 0.24, 'throughput': 358.30} | |
| [INFO|2025-04-10 02:15:14] logging.py:143 >> {'loss': 3.7193, 'learning_rate': 4.8933e-05, 'epoch': 0.28, 'throughput': 353.55} | |
| [INFO|2025-04-10 02:15:32] logging.py:143 >> {'loss': 3.4887, 'learning_rate': 4.8609e-05, 'epoch': 0.32, 'throughput': 350.40} | |
| [INFO|2025-04-10 02:15:50] logging.py:143 >> {'loss': 3.4482, 'learning_rate': 4.8244e-05, 'epoch': 0.36, 'throughput': 346.39} | |
| [INFO|2025-04-10 02:16:08] logging.py:143 >> {'loss': 3.5251, 'learning_rate': 4.7839e-05, 'epoch': 0.40, 'throughput': 339.62} | |
| [INFO|2025-04-10 02:16:26] logging.py:143 >> {'loss': 3.4091, 'learning_rate': 4.7393e-05, 'epoch': 0.44, 'throughput': 344.36} | |
| [INFO|2025-04-10 02:16:45] logging.py:143 >> {'loss': 3.4556, 'learning_rate': 4.6908e-05, 'epoch': 0.48, 'throughput': 351.95} | |
| [INFO|2025-04-10 02:17:03] logging.py:143 >> {'loss': 3.3753, 'learning_rate': 4.6384e-05, 'epoch': 0.52, 'throughput': 355.56} | |
| [INFO|2025-04-10 02:17:22] logging.py:143 >> {'loss': 3.3786, 'learning_rate': 4.5823e-05, 'epoch': 0.56, 'throughput': 353.28} | |
| [INFO|2025-04-10 02:17:40] logging.py:143 >> {'loss': 3.4089, 'learning_rate': 4.5225e-05, 'epoch': 0.60, 'throughput': 349.53} | |
| [INFO|2025-04-10 02:17:58] logging.py:143 >> {'loss': 3.4508, 'learning_rate': 4.4592e-05, 'epoch': 0.64, 'throughput': 352.33} | |
| [INFO|2025-04-10 02:18:16] logging.py:143 >> {'loss': 3.3330, 'learning_rate': 4.3925e-05, 'epoch': 0.68, 'throughput': 353.37} | |
| [INFO|2025-04-10 02:18:35] logging.py:143 >> {'loss': 3.3651, 'learning_rate': 4.3224e-05, 'epoch': 0.72, 'throughput': 357.01} | |
| [INFO|2025-04-10 02:18:52] logging.py:143 >> {'loss': 3.2850, 'learning_rate': 4.2492e-05, 'epoch': 0.76, 'throughput': 355.97} | |
| [INFO|2025-04-10 02:19:10] logging.py:143 >> {'loss': 3.3726, 'learning_rate': 4.1728e-05, 'epoch': 0.80, 'throughput': 356.11} | |
| [INFO|2025-04-10 02:19:10] trainer.py:3942 >> Saving model checkpoint to saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-100 | |
| [INFO|2025-04-10 02:19:11] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/config.json | |
| [INFO|2025-04-10 02:19:11] configuration_utils.py:771 >> Model config Qwen2Config { | |
| "architectures": [ | |
| "Qwen2ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "max_position_embeddings": 131072, | |
| "max_window_layers": 28, | |
| "model_type": "qwen2", | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 131072, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "use_cache": true, | |
| "use_mrope": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 152064 | |
| } | |
| [INFO|2025-04-10 02:19:11] tokenization_utils_base.py:2500 >> tokenizer config file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-100/tokenizer_config.json | |
| [INFO|2025-04-10 02:19:11] tokenization_utils_base.py:2509 >> Special tokens file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-100/special_tokens_map.json | |
| [INFO|2025-04-10 02:19:30] logging.py:143 >> {'loss': 3.3490, 'learning_rate': 4.0936e-05, 'epoch': 0.84, 'throughput': 360.08} | |
| [INFO|2025-04-10 02:19:48] logging.py:143 >> {'loss': 3.3335, 'learning_rate': 4.0115e-05, 'epoch': 0.88, 'throughput': 358.69} | |
| [INFO|2025-04-10 02:20:06] logging.py:143 >> {'loss': 3.3096, 'learning_rate': 3.9268e-05, 'epoch': 0.92, 'throughput': 356.95} | |
| [INFO|2025-04-10 02:20:24] logging.py:143 >> {'loss': 3.2696, 'learning_rate': 3.8396e-05, 'epoch': 0.96, 'throughput': 354.39} | |
| [INFO|2025-04-10 02:20:43] logging.py:143 >> {'loss': 3.3071, 'learning_rate': 3.7500e-05, 'epoch': 1.00, 'throughput': 357.59} | |
| [INFO|2025-04-10 02:21:01] logging.py:143 >> {'loss': 3.8956, 'learning_rate': 3.6582e-05, 'epoch': 1.04, 'throughput': 355.28} | |
| [INFO|2025-04-10 02:21:19] logging.py:143 >> {'loss': 3.2505, 'learning_rate': 3.5644e-05, 'epoch': 1.08, 'throughput': 353.84} | |
| [INFO|2025-04-10 02:21:38] logging.py:143 >> {'loss': 3.2490, 'learning_rate': 3.4688e-05, 'epoch': 1.12, 'throughput': 355.97} | |
| [INFO|2025-04-10 02:21:56] logging.py:143 >> {'loss': 3.1777, 'learning_rate': 3.3714e-05, 'epoch': 1.16, 'throughput': 354.36} | |
| [INFO|2025-04-10 02:22:14] logging.py:143 >> {'loss': 3.2005, 'learning_rate': 3.2725e-05, 'epoch': 1.20, 'throughput': 354.54} | |
| [INFO|2025-04-10 02:22:32] logging.py:143 >> {'loss': 3.1882, 'learning_rate': 3.1723e-05, 'epoch': 1.24, 'throughput': 356.67} | |
| [INFO|2025-04-10 02:22:50] logging.py:143 >> {'loss': 3.2861, 'learning_rate': 3.0709e-05, 'epoch': 1.28, 'throughput': 354.95} | |
| [INFO|2025-04-10 02:23:08] logging.py:143 >> {'loss': 3.2012, 'learning_rate': 2.9685e-05, 'epoch': 1.32, 'throughput': 352.93} | |
| [INFO|2025-04-10 02:23:27] logging.py:143 >> {'loss': 3.1424, 'learning_rate': 2.8652e-05, 'epoch': 1.36, 'throughput': 353.98} | |
| [INFO|2025-04-10 02:23:45] logging.py:143 >> {'loss': 3.1061, 'learning_rate': 2.7613e-05, 'epoch': 1.40, 'throughput': 351.42} | |
| [INFO|2025-04-10 02:24:04] logging.py:143 >> {'loss': 3.1739, 'learning_rate': 2.6570e-05, 'epoch': 1.44, 'throughput': 352.41} | |
| [INFO|2025-04-10 02:24:22] logging.py:143 >> {'loss': 3.1692, 'learning_rate': 2.5524e-05, 'epoch': 1.48, 'throughput': 353.70} | |
| [INFO|2025-04-10 02:24:41] logging.py:143 >> {'loss': 3.2116, 'learning_rate': 2.4476e-05, 'epoch': 1.52, 'throughput': 355.00} | |
| [INFO|2025-04-10 02:24:58] logging.py:143 >> {'loss': 3.3134, 'learning_rate': 2.3430e-05, 'epoch': 1.56, 'throughput': 355.61} | |
| [INFO|2025-04-10 02:25:17] logging.py:143 >> {'loss': 3.2215, 'learning_rate': 2.2387e-05, 'epoch': 1.60, 'throughput': 356.20} | |
| [INFO|2025-04-10 02:25:17] trainer.py:3942 >> Saving model checkpoint to saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-200 | |
| [INFO|2025-04-10 02:25:17] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/config.json | |
| [INFO|2025-04-10 02:25:17] configuration_utils.py:771 >> Model config Qwen2Config { | |
| "architectures": [ | |
| "Qwen2ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "max_position_embeddings": 131072, | |
| "max_window_layers": 28, | |
| "model_type": "qwen2", | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 131072, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "use_cache": true, | |
| "use_mrope": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 152064 | |
| } | |
| [INFO|2025-04-10 02:25:17] tokenization_utils_base.py:2500 >> tokenizer config file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-200/tokenizer_config.json | |
| [INFO|2025-04-10 02:25:17] tokenization_utils_base.py:2509 >> Special tokens file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-200/special_tokens_map.json | |
| [INFO|2025-04-10 02:25:36] logging.py:143 >> {'loss': 3.0594, 'learning_rate': 2.1348e-05, 'epoch': 1.64, 'throughput': 354.14} | |
| [INFO|2025-04-10 02:25:55] logging.py:143 >> {'loss': 3.1452, 'learning_rate': 2.0315e-05, 'epoch': 1.68, 'throughput': 356.32} | |
| [INFO|2025-04-10 02:26:14] logging.py:143 >> {'loss': 3.2481, 'learning_rate': 1.9291e-05, 'epoch': 1.72, 'throughput': 356.70} | |
| [INFO|2025-04-10 02:26:34] logging.py:143 >> {'loss': 3.1269, 'learning_rate': 1.8277e-05, 'epoch': 1.76, 'throughput': 359.85} | |
| [INFO|2025-04-10 02:26:52] logging.py:143 >> {'loss': 3.1888, 'learning_rate': 1.7275e-05, 'epoch': 1.80, 'throughput': 360.92} | |
| [INFO|2025-04-10 02:27:10] logging.py:143 >> {'loss': 3.2137, 'learning_rate': 1.6286e-05, 'epoch': 1.84, 'throughput': 359.60} | |
| [INFO|2025-04-10 02:27:28] logging.py:143 >> {'loss': 3.1926, 'learning_rate': 1.5312e-05, 'epoch': 1.88, 'throughput': 358.00} | |
| [INFO|2025-04-10 02:27:46] logging.py:143 >> {'loss': 3.2949, 'learning_rate': 1.4356e-05, 'epoch': 1.92, 'throughput': 357.75} | |
| [INFO|2025-04-10 02:28:04] logging.py:143 >> {'loss': 3.1559, 'learning_rate': 1.3418e-05, 'epoch': 1.96, 'throughput': 356.64} | |
| [INFO|2025-04-10 02:28:22] logging.py:143 >> {'loss': 3.1411, 'learning_rate': 1.2500e-05, 'epoch': 2.00, 'throughput': 355.99} | |
| [INFO|2025-04-10 02:28:40] logging.py:143 >> {'loss': 3.5418, 'learning_rate': 1.1604e-05, 'epoch': 2.04, 'throughput': 354.77} | |
| [INFO|2025-04-10 02:28:58] logging.py:143 >> {'loss': 3.0061, 'learning_rate': 1.0732e-05, 'epoch': 2.08, 'throughput': 354.05} | |
| [INFO|2025-04-10 02:29:16] logging.py:143 >> {'loss': 3.1020, 'learning_rate': 9.8850e-06, 'epoch': 2.12, 'throughput': 352.60} | |
| [INFO|2025-04-10 02:29:34] logging.py:143 >> {'loss': 2.9862, 'learning_rate': 9.0644e-06, 'epoch': 2.16, 'throughput': 351.35} | |
| [INFO|2025-04-10 02:29:53] logging.py:143 >> {'loss': 3.0803, 'learning_rate': 8.2717e-06, 'epoch': 2.20, 'throughput': 352.72} | |
| [INFO|2025-04-10 02:30:11] logging.py:143 >> {'loss': 3.1944, 'learning_rate': 7.5084e-06, 'epoch': 2.24, 'throughput': 352.51} | |
| [INFO|2025-04-10 02:30:29] logging.py:143 >> {'loss': 3.1717, 'learning_rate': 6.7758e-06, 'epoch': 2.28, 'throughput': 354.26} | |
| [INFO|2025-04-10 02:30:49] logging.py:143 >> {'loss': 3.2106, 'learning_rate': 6.0751e-06, 'epoch': 2.32, 'throughput': 355.43} | |
| [INFO|2025-04-10 02:31:07] logging.py:143 >> {'loss': 3.0577, 'learning_rate': 5.4077e-06, 'epoch': 2.36, 'throughput': 356.33} | |
| [INFO|2025-04-10 02:31:25] logging.py:143 >> {'loss': 3.1227, 'learning_rate': 4.7746e-06, 'epoch': 2.40, 'throughput': 356.28} | |
| [INFO|2025-04-10 02:31:25] trainer.py:3942 >> Saving model checkpoint to saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-300 | |
| [INFO|2025-04-10 02:31:25] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/config.json | |
| [INFO|2025-04-10 02:31:25] configuration_utils.py:771 >> Model config Qwen2Config { | |
| "architectures": [ | |
| "Qwen2ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "max_position_embeddings": 131072, | |
| "max_window_layers": 28, | |
| "model_type": "qwen2", | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 131072, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "use_cache": true, | |
| "use_mrope": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 152064 | |
| } | |
| [INFO|2025-04-10 02:31:25] tokenization_utils_base.py:2500 >> tokenizer config file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-300/tokenizer_config.json | |
| [INFO|2025-04-10 02:31:25] tokenization_utils_base.py:2509 >> Special tokens file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-300/special_tokens_map.json | |
| [INFO|2025-04-10 02:31:44] logging.py:143 >> {'loss': 3.1015, 'learning_rate': 4.1770e-06, 'epoch': 2.44, 'throughput': 355.52} | |
| [INFO|2025-04-10 02:32:02] logging.py:143 >> {'loss': 3.0393, 'learning_rate': 3.6159e-06, 'epoch': 2.48, 'throughput': 355.42} | |
| [INFO|2025-04-10 02:32:20] logging.py:143 >> {'loss': 2.9964, 'learning_rate': 3.0923e-06, 'epoch': 2.52, 'throughput': 354.97} | |
| [INFO|2025-04-10 02:32:38] logging.py:143 >> {'loss': 3.1617, 'learning_rate': 2.6072e-06, 'epoch': 2.56, 'throughput': 355.40} | |
| [INFO|2025-04-10 02:32:58] logging.py:143 >> {'loss': 3.0388, 'learning_rate': 2.1614e-06, 'epoch': 2.60, 'throughput': 356.66} | |
| [INFO|2025-04-10 02:33:16] logging.py:143 >> {'loss': 3.1580, 'learning_rate': 1.7556e-06, 'epoch': 2.64, 'throughput': 356.13} | |
| [INFO|2025-04-10 02:33:34] logging.py:143 >> {'loss': 3.0348, 'learning_rate': 1.3906e-06, 'epoch': 2.68, 'throughput': 356.25} | |
| [INFO|2025-04-10 02:33:52] logging.py:143 >> {'loss': 3.1515, 'learning_rate': 1.0670e-06, 'epoch': 2.72, 'throughput': 355.61} | |
| [INFO|2025-04-10 02:34:11] logging.py:143 >> {'loss': 3.0978, 'learning_rate': 7.8542e-07, 'epoch': 2.76, 'throughput': 356.26} | |
| [INFO|2025-04-10 02:34:29] logging.py:143 >> {'loss': 3.0510, 'learning_rate': 5.4631e-07, 'epoch': 2.80, 'throughput': 356.49} | |
| [INFO|2025-04-10 02:34:47] logging.py:143 >> {'loss': 3.0632, 'learning_rate': 3.5010e-07, 'epoch': 2.84, 'throughput': 356.36} | |
| [INFO|2025-04-10 02:35:05] logging.py:143 >> {'loss': 3.0875, 'learning_rate': 1.9713e-07, 'epoch': 2.88, 'throughput': 355.21} | |
| [INFO|2025-04-10 02:35:23] logging.py:143 >> {'loss': 3.1218, 'learning_rate': 8.7679e-08, 'epoch': 2.92, 'throughput': 356.10} | |
| [INFO|2025-04-10 02:35:42] logging.py:143 >> {'loss': 3.0581, 'learning_rate': 2.1929e-08, 'epoch': 2.96, 'throughput': 357.04} | |
| [INFO|2025-04-10 02:36:00] logging.py:143 >> {'loss': 2.9967, 'learning_rate': 0.0000e+00, 'epoch': 3.00, 'throughput': 356.12} | |
| [INFO|2025-04-10 02:36:00] trainer.py:3942 >> Saving model checkpoint to saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-375 | |
| [INFO|2025-04-10 02:36:01] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/config.json | |
| [INFO|2025-04-10 02:36:01] configuration_utils.py:771 >> Model config Qwen2Config { | |
| "architectures": [ | |
| "Qwen2ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "max_position_embeddings": 131072, | |
| "max_window_layers": 28, | |
| "model_type": "qwen2", | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 131072, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "use_cache": true, | |
| "use_mrope": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 152064 | |
| } | |
| [INFO|2025-04-10 02:36:01] tokenization_utils_base.py:2500 >> tokenizer config file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-375/tokenizer_config.json | |
| [INFO|2025-04-10 02:36:01] tokenization_utils_base.py:2509 >> Special tokens file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/checkpoint-375/special_tokens_map.json | |
| [INFO|2025-04-10 02:36:02] trainer.py:2657 >> | |
| Training completed. Do not forget to share your model on huggingface.co/models =) | |
| [INFO|2025-04-10 02:36:02] trainer.py:3942 >> Saving model checkpoint to saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13 | |
| [INFO|2025-04-10 02:36:02] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/config.json | |
| [INFO|2025-04-10 02:36:02] configuration_utils.py:771 >> Model config Qwen2Config { | |
| "architectures": [ | |
| "Qwen2ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "eos_token_id": 151643, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "max_position_embeddings": 131072, | |
| "max_window_layers": 28, | |
| "model_type": "qwen2", | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 131072, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.49.0", | |
| "use_cache": true, | |
| "use_mrope": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 152064 | |
| } | |
| [INFO|2025-04-10 02:36:02] tokenization_utils_base.py:2500 >> tokenizer config file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/tokenizer_config.json | |
| [INFO|2025-04-10 02:36:02] tokenization_utils_base.py:2509 >> Special tokens file saved in saves/Qwen2.5-7B/lora/train_2025-04-10-02-02-13/special_tokens_map.json | |
| [WARNING|2025-04-10 02:36:02] logging.py:148 >> No metric eval_loss to plot. | |
| [WARNING|2025-04-10 02:36:02] logging.py:148 >> No metric eval_accuracy to plot. | |
| [INFO|2025-04-10 02:36:02] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields: | |
| {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} | |