Đào Quốc Tuấn commited on
Upload folder using huggingface_hub
Browse files- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/config.json +39 -0
- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/generation_config.json +6 -0
- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/merges.txt +0 -0
- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/model.safetensors +3 -0
- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/special_tokens_map.json +6 -0
- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/tokenizer.json +0 -0
- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/tokenizer_config.json +21 -0
- experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/vocab.json +0 -0
- experiments/sft_gpt2-120m/20251118_113949/sft_gpt2-120m.log +23 -0
- experiments/sft_gpt2-120m/20251118_113949/sft_gpt2-120m_metrics.jsonl +151 -0
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_function": "gelu_new",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GPT2LMHeadModel"
|
| 5 |
+
],
|
| 6 |
+
"attn_pdrop": 0.1,
|
| 7 |
+
"bos_token_id": 50256,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 50256,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 1024,
|
| 20 |
+
"pad_token_id": 50256,
|
| 21 |
+
"reorder_and_upcast_attn": false,
|
| 22 |
+
"resid_pdrop": 0.1,
|
| 23 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 24 |
+
"scale_attn_weights": true,
|
| 25 |
+
"summary_activation": null,
|
| 26 |
+
"summary_first_dropout": 0.1,
|
| 27 |
+
"summary_proj_to_labels": true,
|
| 28 |
+
"summary_type": "cls_index",
|
| 29 |
+
"summary_use_proj": true,
|
| 30 |
+
"task_specific_params": {
|
| 31 |
+
"text-generation": {
|
| 32 |
+
"do_sample": true,
|
| 33 |
+
"max_length": 50
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"transformers_version": "4.56.0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 50257
|
| 39 |
+
}
|
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.56.0"
|
| 6 |
+
}
|
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b87253f4634d5237a1e5ec9c660b56fc58cd5d7067e45d73a367654c4ce8decf
|
| 3 |
+
size 497774208
|
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<|endoftext|>",
|
| 3 |
+
"eos_token": "<|endoftext|>",
|
| 4 |
+
"pad_token": "<|endoftext|>",
|
| 5 |
+
"unk_token": "<|endoftext|>"
|
| 6 |
+
}
|
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/tokenizer_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"50256": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"bos_token": "<|endoftext|>",
|
| 14 |
+
"clean_up_tokenization_spaces": false,
|
| 15 |
+
"eos_token": "<|endoftext|>",
|
| 16 |
+
"extra_special_tokens": {},
|
| 17 |
+
"model_max_length": 1024,
|
| 18 |
+
"pad_token": "<|endoftext|>",
|
| 19 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 20 |
+
"unk_token": "<|endoftext|>"
|
| 21 |
+
}
|
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/sft_gpt2-120m/20251118_113949/sft_gpt2-120m.log
CHANGED
|
@@ -388,3 +388,26 @@
|
|
| 388 |
2025-11-18 12:23:12,710 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 389 |
2025-11-18 12:23:16,231 - root - [32m[1mINFO[0m - Epoch 19/20 eval loss: 5.02036452293396, eval rougeL: 0.11912210136407948
|
| 390 |
2025-11-18 12:23:16,820 - root - [32m[1mINFO[0m - Epoch 20/20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
2025-11-18 12:23:12,710 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 389 |
2025-11-18 12:23:16,231 - root - [32m[1mINFO[0m - Epoch 19/20 eval loss: 5.02036452293396, eval rougeL: 0.11912210136407948
|
| 390 |
2025-11-18 12:23:16,820 - root - [32m[1mINFO[0m - Epoch 20/20
|
| 391 |
+
2025-11-18 12:24:02,673 - root - [32m[1mINFO[0m - Step 3501/3580 finished
|
| 392 |
+
2025-11-18 12:24:02,999 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 393 |
+
2025-11-18 12:24:07,521 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 394 |
+
2025-11-18 12:24:11,952 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 395 |
+
2025-11-18 12:24:16,444 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 396 |
+
2025-11-18 12:24:20,974 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 397 |
+
2025-11-18 12:24:25,472 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 398 |
+
2025-11-18 12:24:29,970 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 399 |
+
2025-11-18 12:24:34,543 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 400 |
+
2025-11-18 12:24:38,045 - root - [32m[1mINFO[0m - Epoch 20/20 eval loss: 5.0220290422439575, eval rougeL: 0.11872779423918271
|
| 401 |
+
2025-11-18 12:24:38,212 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 402 |
+
2025-11-18 12:24:40,792 - root - [32m[1mINFO[0m - Step 3501/3580 train rougeL: 0.36769257417936035
|
| 403 |
+
2025-11-18 12:24:41,082 - root - [32m[1mINFO[0m - Step 3501/3580 loss: 0.07207430899143219, total_norm: 0.3702149987220764
|
| 404 |
+
2025-11-18 12:25:17,444 - root - [32m[1mINFO[0m - Epoch 20/20 finished
|
| 405 |
+
2025-11-18 12:25:17,771 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 406 |
+
2025-11-18 12:25:22,270 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 407 |
+
2025-11-18 12:25:26,738 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 408 |
+
2025-11-18 12:25:31,201 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 409 |
+
2025-11-18 12:25:35,716 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 410 |
+
2025-11-18 12:25:40,175 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 411 |
+
2025-11-18 12:25:44,623 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 412 |
+
2025-11-18 12:25:49,008 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 413 |
+
2025-11-18 12:25:52,532 - root - [32m[1mINFO[0m - Epoch 20/20 eval loss: 5.02215451002121, eval rougeL: 0.11894351165818984
|
experiments/sft_gpt2-120m/20251118_113949/sft_gpt2-120m_metrics.jsonl
CHANGED
|
@@ -3455,3 +3455,154 @@
|
|
| 3455 |
{"epoch": 19, "step": 3428, "loss": 0.06631392240524292, "total_norm": 0.38597530126571655}
|
| 3456 |
{"epoch": 19, "step": 3429, "loss": 0.06341607123613358, "total_norm": 0.39210784435272217}
|
| 3457 |
{"epoch": 19, "step": 3430, "loss": 0.07612670958042145, "total_norm": 0.46375593543052673}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3455 |
{"epoch": 19, "step": 3428, "loss": 0.06631392240524292, "total_norm": 0.38597530126571655}
|
| 3456 |
{"epoch": 19, "step": 3429, "loss": 0.06341607123613358, "total_norm": 0.39210784435272217}
|
| 3457 |
{"epoch": 19, "step": 3430, "loss": 0.07612670958042145, "total_norm": 0.46375593543052673}
|
| 3458 |
+
{"epoch": 19, "step": 3431, "loss": 0.06649475544691086, "total_norm": 0.40509912371635437}
|
| 3459 |
+
{"epoch": 19, "step": 3432, "loss": 0.06405585259199142, "total_norm": 0.3777313530445099}
|
| 3460 |
+
{"epoch": 19, "step": 3433, "loss": 0.07823031395673752, "total_norm": 0.44407787919044495}
|
| 3461 |
+
{"epoch": 19, "step": 3434, "loss": 0.07584518194198608, "total_norm": 0.4376632571220398}
|
| 3462 |
+
{"epoch": 19, "step": 3435, "loss": 0.06603962182998657, "total_norm": 0.37467437982559204}
|
| 3463 |
+
{"epoch": 19, "step": 3436, "loss": 0.06879804283380508, "total_norm": 0.36169669032096863}
|
| 3464 |
+
{"epoch": 19, "step": 3437, "loss": 0.0623231902718544, "total_norm": 0.35651248693466187}
|
| 3465 |
+
{"epoch": 19, "step": 3438, "loss": 0.06521435081958771, "total_norm": 0.3765014410018921}
|
| 3466 |
+
{"epoch": 19, "step": 3439, "loss": 0.06612592935562134, "total_norm": 0.398922860622406}
|
| 3467 |
+
{"epoch": 19, "step": 3440, "loss": 0.07783536612987518, "total_norm": 0.46882691979408264}
|
| 3468 |
+
{"epoch": 19, "step": 3441, "loss": 0.08135991543531418, "total_norm": 0.4426175355911255}
|
| 3469 |
+
{"epoch": 19, "step": 3442, "loss": 0.06669371575117111, "total_norm": 0.385215699672699}
|
| 3470 |
+
{"epoch": 19, "step": 3443, "loss": 0.07437398284673691, "total_norm": 0.3940950930118561}
|
| 3471 |
+
{"epoch": 19, "step": 3444, "loss": 0.06909912824630737, "total_norm": 0.43716415762901306}
|
| 3472 |
+
{"epoch": 19, "step": 3445, "loss": 0.06889934092760086, "total_norm": 0.3929239809513092}
|
| 3473 |
+
{"epoch": 19, "step": 3446, "loss": 0.0719086155295372, "total_norm": 0.3722221851348877}
|
| 3474 |
+
{"epoch": 19, "step": 3447, "loss": 0.07598002254962921, "total_norm": 0.4522363543510437}
|
| 3475 |
+
{"epoch": 19, "step": 3448, "loss": 0.06892066448926926, "total_norm": 0.45051389932632446}
|
| 3476 |
+
{"epoch": 19, "step": 3449, "loss": 0.060814905911684036, "total_norm": 0.3825909197330475}
|
| 3477 |
+
{"epoch": 19, "step": 3450, "loss": 0.06693698465824127, "total_norm": 0.46046268939971924}
|
| 3478 |
+
{"epoch": 19, "step": 3451, "loss": 0.07806388288736343, "total_norm": 0.35896995663642883}
|
| 3479 |
+
{"epoch": 19, "step": 3452, "loss": 0.07569962739944458, "total_norm": 0.3824521601200104}
|
| 3480 |
+
{"epoch": 19, "step": 3453, "loss": 0.07433344423770905, "total_norm": 0.43837347626686096}
|
| 3481 |
+
{"epoch": 19, "step": 3454, "loss": 0.07127473503351212, "total_norm": 0.3958043158054352}
|
| 3482 |
+
{"epoch": 19, "step": 3455, "loss": 0.07177051156759262, "total_norm": 0.44299301505088806}
|
| 3483 |
+
{"epoch": 19, "step": 3456, "loss": 0.06487474590539932, "total_norm": 0.40540966391563416}
|
| 3484 |
+
{"epoch": 19, "step": 3457, "loss": 0.06358999013900757, "total_norm": 0.3553120493888855}
|
| 3485 |
+
{"epoch": 19, "step": 3458, "loss": 0.07439502328634262, "total_norm": 0.4041728973388672}
|
| 3486 |
+
{"epoch": 19, "step": 3459, "loss": 0.06909509748220444, "total_norm": 0.38303014636039734}
|
| 3487 |
+
{"epoch": 19, "step": 3460, "loss": 0.06487569212913513, "total_norm": 0.4316783845424652}
|
| 3488 |
+
{"epoch": 19, "step": 3461, "loss": 0.06462734192609787, "total_norm": 0.39192458987236023}
|
| 3489 |
+
{"epoch": 19, "step": 3462, "loss": 0.06951399892568588, "total_norm": 0.43483564257621765}
|
| 3490 |
+
{"epoch": 19, "step": 3463, "loss": 0.06610497087240219, "total_norm": 0.4542086124420166}
|
| 3491 |
+
{"epoch": 19, "step": 3464, "loss": 0.07372887432575226, "total_norm": 0.4204694926738739}
|
| 3492 |
+
{"epoch": 19, "step": 3465, "loss": 0.08062244951725006, "total_norm": 0.4212283790111542}
|
| 3493 |
+
{"epoch": 19, "step": 3466, "loss": 0.06950519979000092, "total_norm": 0.37755486369132996}
|
| 3494 |
+
{"epoch": 19, "step": 3467, "loss": 0.07102422416210175, "total_norm": 0.4094632565975189}
|
| 3495 |
+
{"epoch": 19, "step": 3468, "loss": 0.060225386172533035, "total_norm": 0.3812105655670166}
|
| 3496 |
+
{"epoch": 19, "step": 3469, "loss": 0.06577896326780319, "total_norm": 0.4254424571990967}
|
| 3497 |
+
{"epoch": 19, "step": 3470, "loss": 0.06463371217250824, "total_norm": 0.40942075848579407}
|
| 3498 |
+
{"epoch": 19, "step": 3471, "loss": 0.07263350486755371, "total_norm": 0.3554999530315399}
|
| 3499 |
+
{"epoch": 19, "step": 3472, "loss": 0.07966326177120209, "total_norm": 0.4493215084075928}
|
| 3500 |
+
{"epoch": 19, "step": 3473, "loss": 0.06410136073827744, "total_norm": 0.3939339220523834}
|
| 3501 |
+
{"epoch": 19, "step": 3474, "loss": 0.07636649906635284, "total_norm": 0.4016420841217041}
|
| 3502 |
+
{"epoch": 19, "step": 3475, "loss": 0.07257033884525299, "total_norm": 0.4046069383621216}
|
| 3503 |
+
{"epoch": 19, "step": 3476, "loss": 0.07237204909324646, "total_norm": 0.39212438464164734}
|
| 3504 |
+
{"epoch": 19, "step": 3477, "loss": 0.06761867552995682, "total_norm": 0.42311716079711914}
|
| 3505 |
+
{"epoch": 19, "step": 3478, "loss": 0.06813068687915802, "total_norm": 0.37189480662345886}
|
| 3506 |
+
{"epoch": 19, "step": 3479, "loss": 0.07458849251270294, "total_norm": 0.3864719867706299}
|
| 3507 |
+
{"epoch": 19, "step": 3480, "loss": 0.06934340298175812, "total_norm": 0.4600399136543274}
|
| 3508 |
+
{"epoch": 19, "step": 3481, "loss": 0.06718819588422775, "total_norm": 0.4141007959842682}
|
| 3509 |
+
{"epoch": 19, "step": 3482, "loss": 0.06236109510064125, "total_norm": 0.36711663007736206}
|
| 3510 |
+
{"epoch": 19, "step": 3483, "loss": 0.06310893595218658, "total_norm": 0.42796778678894043}
|
| 3511 |
+
{"epoch": 19, "step": 3484, "loss": 0.07416108250617981, "total_norm": 0.4565519094467163}
|
| 3512 |
+
{"epoch": 19, "step": 3485, "loss": 0.06560900062322617, "total_norm": 0.3995119333267212}
|
| 3513 |
+
{"epoch": 19, "step": 3486, "loss": 0.06989232450723648, "total_norm": 0.4169926941394806}
|
| 3514 |
+
{"epoch": 19, "step": 3487, "loss": 0.07123791426420212, "total_norm": 0.39384472370147705}
|
| 3515 |
+
{"epoch": 19, "step": 3488, "loss": 0.07095292210578918, "total_norm": 0.4068931043148041}
|
| 3516 |
+
{"epoch": 19, "step": 3489, "loss": 0.06878367066383362, "total_norm": 0.41394373774528503}
|
| 3517 |
+
{"epoch": 19, "step": 3490, "loss": 0.06557326763868332, "total_norm": 0.41651469469070435}
|
| 3518 |
+
{"epoch": 19, "step": 3491, "loss": 0.07315421104431152, "total_norm": 0.4758340120315552}
|
| 3519 |
+
{"epoch": 19, "step": 3492, "loss": 0.07345067709684372, "total_norm": 0.3815555274486542}
|
| 3520 |
+
{"epoch": 19, "step": 3493, "loss": 0.07233898341655731, "total_norm": 0.4131311774253845}
|
| 3521 |
+
{"epoch": 19, "step": 3494, "loss": 0.07435332238674164, "total_norm": 0.41949114203453064}
|
| 3522 |
+
{"epoch": 19, "step": 3495, "loss": 0.06814467161893845, "total_norm": 0.387387752532959}
|
| 3523 |
+
{"epoch": 19, "step": 3496, "loss": 0.07183708995580673, "total_norm": 0.3951456844806671}
|
| 3524 |
+
{"epoch": 19, "step": 3497, "loss": 0.07346130162477493, "total_norm": 0.3674578368663788}
|
| 3525 |
+
{"epoch": 19, "step": 3498, "loss": 0.06627894937992096, "total_norm": 0.3926956057548523}
|
| 3526 |
+
{"epoch": 19, "step": 3499, "loss": 0.0776500403881073, "total_norm": 0.4104427695274353}
|
| 3527 |
+
{"epoch": 19, "step": 3500, "eval_loss": 5.0220290422439575, "eval_rougeL": 0.11872779423918271}
|
| 3528 |
+
{"epoch": 19, "step": 3500, "loss": 0.07207430899143219, "total_norm": 0.3702149987220764}
|
| 3529 |
+
{"epoch": 19, "step": 3501, "loss": 0.06158837303519249, "total_norm": 0.36664992570877075}
|
| 3530 |
+
{"epoch": 19, "step": 3502, "loss": 0.07010766118764877, "total_norm": 0.37997540831565857}
|
| 3531 |
+
{"epoch": 19, "step": 3503, "loss": 0.07500073313713074, "total_norm": 0.41642364859580994}
|
| 3532 |
+
{"epoch": 19, "step": 3504, "loss": 0.05905424803495407, "total_norm": 0.38566330075263977}
|
| 3533 |
+
{"epoch": 19, "step": 3505, "loss": 0.06727441400289536, "total_norm": 0.38475751876831055}
|
| 3534 |
+
{"epoch": 19, "step": 3506, "loss": 0.060138337314128876, "total_norm": 0.413043737411499}
|
| 3535 |
+
{"epoch": 19, "step": 3507, "loss": 0.06197485700249672, "total_norm": 0.3413826525211334}
|
| 3536 |
+
{"epoch": 19, "step": 3508, "loss": 0.06812082231044769, "total_norm": 0.38554421067237854}
|
| 3537 |
+
{"epoch": 19, "step": 3509, "loss": 0.0738016664981842, "total_norm": 0.45579251646995544}
|
| 3538 |
+
{"epoch": 19, "step": 3510, "loss": 0.06546374410390854, "total_norm": 0.3662172853946686}
|
| 3539 |
+
{"epoch": 19, "step": 3511, "loss": 0.06208968162536621, "total_norm": 0.39762791991233826}
|
| 3540 |
+
{"epoch": 19, "step": 3512, "loss": 0.07024646550416946, "total_norm": 0.5137607455253601}
|
| 3541 |
+
{"epoch": 19, "step": 3513, "loss": 0.07134389132261276, "total_norm": 0.4670751094818115}
|
| 3542 |
+
{"epoch": 19, "step": 3514, "loss": 0.07728653401136398, "total_norm": 0.3857883810997009}
|
| 3543 |
+
{"epoch": 19, "step": 3515, "loss": 0.06422069668769836, "total_norm": 0.4053337275981903}
|
| 3544 |
+
{"epoch": 19, "step": 3516, "loss": 0.06729034334421158, "total_norm": 0.38059550523757935}
|
| 3545 |
+
{"epoch": 19, "step": 3517, "loss": 0.07859373092651367, "total_norm": 0.48532700538635254}
|
| 3546 |
+
{"epoch": 19, "step": 3518, "loss": 0.060003913938999176, "total_norm": 0.35765382647514343}
|
| 3547 |
+
{"epoch": 19, "step": 3519, "loss": 0.07169795781373978, "total_norm": 0.3875722885131836}
|
| 3548 |
+
{"epoch": 19, "step": 3520, "loss": 0.06319695711135864, "total_norm": 0.3974921703338623}
|
| 3549 |
+
{"epoch": 19, "step": 3521, "loss": 0.07936239242553711, "total_norm": 0.4573519229888916}
|
| 3550 |
+
{"epoch": 19, "step": 3522, "loss": 0.06578812748193741, "total_norm": 0.39051687717437744}
|
| 3551 |
+
{"epoch": 19, "step": 3523, "loss": 0.06445008516311646, "total_norm": 0.39269962906837463}
|
| 3552 |
+
{"epoch": 19, "step": 3524, "loss": 0.07205944508314133, "total_norm": 0.3946966826915741}
|
| 3553 |
+
{"epoch": 19, "step": 3525, "loss": 0.06935378164052963, "total_norm": 0.40876826643943787}
|
| 3554 |
+
{"epoch": 19, "step": 3526, "loss": 0.06792822480201721, "total_norm": 0.39337751269340515}
|
| 3555 |
+
{"epoch": 19, "step": 3527, "loss": 0.07395108789205551, "total_norm": 0.4483739733695984}
|
| 3556 |
+
{"epoch": 19, "step": 3528, "loss": 0.06934802234172821, "total_norm": 0.3965384066104889}
|
| 3557 |
+
{"epoch": 19, "step": 3529, "loss": 0.0827133059501648, "total_norm": 0.44291070103645325}
|
| 3558 |
+
{"epoch": 19, "step": 3530, "loss": 0.07297205179929733, "total_norm": 0.47151389718055725}
|
| 3559 |
+
{"epoch": 19, "step": 3531, "loss": 0.06897492706775665, "total_norm": 0.3717750012874603}
|
| 3560 |
+
{"epoch": 19, "step": 3532, "loss": 0.06802594661712646, "total_norm": 0.3665638864040375}
|
| 3561 |
+
{"epoch": 19, "step": 3533, "loss": 0.07631628960371017, "total_norm": 0.4539519250392914}
|
| 3562 |
+
{"epoch": 19, "step": 3534, "loss": 0.06562040001153946, "total_norm": 0.3907945156097412}
|
| 3563 |
+
{"epoch": 19, "step": 3535, "loss": 0.07131240516901016, "total_norm": 0.40537747740745544}
|
| 3564 |
+
{"epoch": 19, "step": 3536, "loss": 0.06623981148004532, "total_norm": 0.37212270498275757}
|
| 3565 |
+
{"epoch": 19, "step": 3537, "loss": 0.05907493829727173, "total_norm": 0.3867167532444}
|
| 3566 |
+
{"epoch": 19, "step": 3538, "loss": 0.060727182775735855, "total_norm": 0.3760104775428772}
|
| 3567 |
+
{"epoch": 19, "step": 3539, "loss": 0.06602499634027481, "total_norm": 0.3716023564338684}
|
| 3568 |
+
{"epoch": 19, "step": 3540, "loss": 0.0792287215590477, "total_norm": 0.4493481516838074}
|
| 3569 |
+
{"epoch": 19, "step": 3541, "loss": 0.0659322738647461, "total_norm": 0.366845041513443}
|
| 3570 |
+
{"epoch": 19, "step": 3542, "loss": 0.06888958811759949, "total_norm": 0.38213247060775757}
|
| 3571 |
+
{"epoch": 19, "step": 3543, "loss": 0.07498490065336227, "total_norm": 0.39568865299224854}
|
| 3572 |
+
{"epoch": 19, "step": 3544, "loss": 0.06685784459114075, "total_norm": 0.3744715750217438}
|
| 3573 |
+
{"epoch": 19, "step": 3545, "loss": 0.06664440035820007, "total_norm": 0.40962454676628113}
|
| 3574 |
+
{"epoch": 19, "step": 3546, "loss": 0.06505069881677628, "total_norm": 0.3674473464488983}
|
| 3575 |
+
{"epoch": 19, "step": 3547, "loss": 0.06825530529022217, "total_norm": 0.42193111777305603}
|
| 3576 |
+
{"epoch": 19, "step": 3548, "loss": 0.07147165387868881, "total_norm": 0.4118800759315491}
|
| 3577 |
+
{"epoch": 19, "step": 3549, "loss": 0.08126255124807358, "total_norm": 0.4317931532859802}
|
| 3578 |
+
{"epoch": 19, "step": 3550, "loss": 0.07912220060825348, "total_norm": 0.4415907561779022}
|
| 3579 |
+
{"epoch": 19, "step": 3551, "loss": 0.07138849794864655, "total_norm": 0.3974316418170929}
|
| 3580 |
+
{"epoch": 19, "step": 3552, "loss": 0.07288537174463272, "total_norm": 0.5247356295585632}
|
| 3581 |
+
{"epoch": 19, "step": 3553, "loss": 0.0735028013586998, "total_norm": 0.42138493061065674}
|
| 3582 |
+
{"epoch": 19, "step": 3554, "loss": 0.07932400703430176, "total_norm": 0.4254576563835144}
|
| 3583 |
+
{"epoch": 19, "step": 3555, "loss": 0.0751042515039444, "total_norm": 0.4099126160144806}
|
| 3584 |
+
{"epoch": 19, "step": 3556, "loss": 0.0627933219075203, "total_norm": 0.3664637804031372}
|
| 3585 |
+
{"epoch": 19, "step": 3557, "loss": 0.06501071900129318, "total_norm": 0.3542080521583557}
|
| 3586 |
+
{"epoch": 19, "step": 3558, "loss": 0.07052960246801376, "total_norm": 0.44213640689849854}
|
| 3587 |
+
{"epoch": 19, "step": 3559, "loss": 0.06646731495857239, "total_norm": 0.396994024515152}
|
| 3588 |
+
{"epoch": 19, "step": 3560, "loss": 0.059404127299785614, "total_norm": 0.3919144570827484}
|
| 3589 |
+
{"epoch": 19, "step": 3561, "loss": 0.0678473636507988, "total_norm": 0.3929302990436554}
|
| 3590 |
+
{"epoch": 19, "step": 3562, "loss": 0.06528989225625992, "total_norm": 0.40572378039360046}
|
| 3591 |
+
{"epoch": 19, "step": 3563, "loss": 0.06991353631019592, "total_norm": 0.38744571805000305}
|
| 3592 |
+
{"epoch": 19, "step": 3564, "loss": 0.0819733589887619, "total_norm": 0.3923647999763489}
|
| 3593 |
+
{"epoch": 19, "step": 3565, "loss": 0.0787283182144165, "total_norm": 0.43464845418930054}
|
| 3594 |
+
{"epoch": 19, "step": 3566, "loss": 0.07298606634140015, "total_norm": 0.43444889783859253}
|
| 3595 |
+
{"epoch": 19, "step": 3567, "loss": 0.05901186913251877, "total_norm": 0.4918997287750244}
|
| 3596 |
+
{"epoch": 19, "step": 3568, "loss": 0.07650952786207199, "total_norm": 0.4300938844680786}
|
| 3597 |
+
{"epoch": 19, "step": 3569, "loss": 0.07255873084068298, "total_norm": 0.4154261350631714}
|
| 3598 |
+
{"epoch": 19, "step": 3570, "loss": 0.07922214269638062, "total_norm": 0.41404852271080017}
|
| 3599 |
+
{"epoch": 19, "step": 3571, "loss": 0.068200021982193, "total_norm": 0.4082469940185547}
|
| 3600 |
+
{"epoch": 19, "step": 3572, "loss": 0.07221460342407227, "total_norm": 0.4273666739463806}
|
| 3601 |
+
{"epoch": 19, "step": 3573, "loss": 0.07680253684520721, "total_norm": 0.47549837827682495}
|
| 3602 |
+
{"epoch": 19, "step": 3574, "loss": 0.0700637474656105, "total_norm": 0.3978300392627716}
|
| 3603 |
+
{"epoch": 19, "step": 3575, "loss": 0.06955338269472122, "total_norm": 0.41550689935684204}
|
| 3604 |
+
{"epoch": 19, "step": 3576, "loss": 0.07201962172985077, "total_norm": 0.3722953498363495}
|
| 3605 |
+
{"epoch": 19, "step": 3577, "loss": 0.07467301934957504, "total_norm": 0.444408118724823}
|
| 3606 |
+
{"epoch": 19, "step": 3578, "loss": 0.06600163131952286, "total_norm": 0.4090379774570465}
|
| 3607 |
+
{"epoch": 19, "step": 3579, "loss": 0.052284255623817444, "total_norm": 0.4305051267147064}
|
| 3608 |
+
{"epoch": 19, "step": 3580, "eval_loss": 5.02215451002121, "eval_rougeL": 0.11894351165818984}
|