Đào Quốc Tuấn commited on
Upload folder using huggingface_hub
Browse files- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/config.json +39 -0
- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/generation_config.json +6 -0
- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/merges.txt +0 -0
- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/model.safetensors +3 -0
- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/special_tokens_map.json +6 -0
- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/tokenizer.json +0 -0
- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/tokenizer_config.json +21 -0
- experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/vocab.json +0 -0
- experiments/sft_gpt2-120m/20251117_231920/sft_gpt2-120m-3.log +29 -0
- experiments/sft_gpt2-120m/20251117_231920/sft_gpt2-120m-3_metrics.jsonl +293 -0
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_function": "gelu_new",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GPT2LMHeadModel"
|
| 5 |
+
],
|
| 6 |
+
"attn_pdrop": 0.1,
|
| 7 |
+
"bos_token_id": 50256,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 50256,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 1024,
|
| 20 |
+
"pad_token_id": 50256,
|
| 21 |
+
"reorder_and_upcast_attn": false,
|
| 22 |
+
"resid_pdrop": 0.1,
|
| 23 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 24 |
+
"scale_attn_weights": true,
|
| 25 |
+
"summary_activation": null,
|
| 26 |
+
"summary_first_dropout": 0.1,
|
| 27 |
+
"summary_proj_to_labels": true,
|
| 28 |
+
"summary_type": "cls_index",
|
| 29 |
+
"summary_use_proj": true,
|
| 30 |
+
"task_specific_params": {
|
| 31 |
+
"text-generation": {
|
| 32 |
+
"do_sample": true,
|
| 33 |
+
"max_length": 50
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"transformers_version": "4.56.0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 50257
|
| 39 |
+
}
|
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.56.0"
|
| 6 |
+
}
|
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92a33bb29e69452a773b8b7a24e3512189dc63e3ab7955cfc7ce3f6b787bff9c
|
| 3 |
+
size 497774208
|
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<|endoftext|>",
|
| 3 |
+
"eos_token": "<|endoftext|>",
|
| 4 |
+
"pad_token": "<|endoftext|>",
|
| 5 |
+
"unk_token": "<|endoftext|>"
|
| 6 |
+
}
|
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/tokenizer_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"50256": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"bos_token": "<|endoftext|>",
|
| 14 |
+
"clean_up_tokenization_spaces": false,
|
| 15 |
+
"eos_token": "<|endoftext|>",
|
| 16 |
+
"extra_special_tokens": {},
|
| 17 |
+
"model_max_length": 1024,
|
| 18 |
+
"pad_token": "<|endoftext|>",
|
| 19 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 20 |
+
"unk_token": "<|endoftext|>"
|
| 21 |
+
}
|
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/sft_gpt2-120m/20251117_231920/sft_gpt2-120m-3.log
CHANGED
|
@@ -560,3 +560,32 @@
|
|
| 560 |
2025-11-18 00:09:15,395 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 561 |
2025-11-18 00:09:18,929 - root - [32m[1mINFO[0m - Epoch 19/20 eval loss: 3.511649340391159, eval rougeL: 0.11313993216799487
|
| 562 |
2025-11-18 00:09:19,670 - root - [32m[1mINFO[0m - Epoch 20/20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
2025-11-18 00:09:15,395 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 561 |
2025-11-18 00:09:18,929 - root - [32m[1mINFO[0m - Epoch 19/20 eval loss: 3.511649340391159, eval rougeL: 0.11313993216799487
|
| 562 |
2025-11-18 00:09:19,670 - root - [32m[1mINFO[0m - Epoch 20/20
|
| 563 |
+
2025-11-18 00:09:43,699 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 564 |
+
2025-11-18 00:09:45,654 - root - [32m[1mINFO[0m - Step 6901/7160 train rougeL: 0.11803548722890815
|
| 565 |
+
2025-11-18 00:09:45,809 - root - [32m[1mINFO[0m - Step 6901/7160 loss: 1.069823145866394, total_norm: 2.2482874393463135
|
| 566 |
+
2025-11-18 00:10:09,937 - root - [32m[1mINFO[0m - Step 7001/7160 finished
|
| 567 |
+
2025-11-18 00:10:10,271 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 568 |
+
2025-11-18 00:10:14,817 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 569 |
+
2025-11-18 00:10:19,273 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 570 |
+
2025-11-18 00:10:23,753 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 571 |
+
2025-11-18 00:10:28,307 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 572 |
+
2025-11-18 00:10:32,780 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 573 |
+
2025-11-18 00:10:37,246 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 574 |
+
2025-11-18 00:10:41,648 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 575 |
+
2025-11-18 00:10:45,182 - root - [32m[1mINFO[0m - Epoch 20/20 eval loss: 3.5214541256427765, eval rougeL: 0.11370361682600127
|
| 576 |
+
2025-11-18 00:10:45,269 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 577 |
+
2025-11-18 00:10:47,242 - root - [32m[1mINFO[0m - Step 7001/7160 train rougeL: 0.15675778332370002
|
| 578 |
+
2025-11-18 00:10:47,397 - root - [32m[1mINFO[0m - Step 7001/7160 loss: 1.1128922700881958, total_norm: 2.13863205909729
|
| 579 |
+
2025-11-18 00:11:11,587 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 580 |
+
2025-11-18 00:11:13,573 - root - [32m[1mINFO[0m - Step 7101/7160 train rougeL: 0.15740878852750026
|
| 581 |
+
2025-11-18 00:11:13,728 - root - [32m[1mINFO[0m - Step 7101/7160 loss: 1.055530309677124, total_norm: 2.0044915676116943
|
| 582 |
+
2025-11-18 00:11:27,950 - root - [32m[1mINFO[0m - Epoch 20/20 finished
|
| 583 |
+
2025-11-18 00:11:28,276 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 584 |
+
2025-11-18 00:11:32,792 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 585 |
+
2025-11-18 00:11:37,230 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 586 |
+
2025-11-18 00:11:41,776 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 587 |
+
2025-11-18 00:11:46,583 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 588 |
+
2025-11-18 00:11:51,036 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 589 |
+
2025-11-18 00:11:55,488 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 590 |
+
2025-11-18 00:11:59,893 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 591 |
+
2025-11-18 00:12:03,418 - root - [32m[1mINFO[0m - Epoch 20/20 eval loss: 3.5230754017829895, eval rougeL: 0.11275351405322884
|
experiments/sft_gpt2-120m/20251117_231920/sft_gpt2-120m-3_metrics.jsonl
CHANGED
|
@@ -6900,3 +6900,296 @@
|
|
| 6900 |
{"epoch": 19, "step": 6866, "loss": 0.8937848806381226, "total_norm": 2.082096576690674}
|
| 6901 |
{"epoch": 19, "step": 6867, "loss": 1.0529240369796753, "total_norm": 2.2550768852233887}
|
| 6902 |
{"epoch": 19, "step": 6868, "loss": 1.0548574924468994, "total_norm": 2.001913070678711}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6900 |
{"epoch": 19, "step": 6866, "loss": 0.8937848806381226, "total_norm": 2.082096576690674}
|
| 6901 |
{"epoch": 19, "step": 6867, "loss": 1.0529240369796753, "total_norm": 2.2550768852233887}
|
| 6902 |
{"epoch": 19, "step": 6868, "loss": 1.0548574924468994, "total_norm": 2.001913070678711}
|
| 6903 |
+
{"epoch": 19, "step": 6869, "loss": 0.956845760345459, "total_norm": 1.8884351253509521}
|
| 6904 |
+
{"epoch": 19, "step": 6870, "loss": 0.953325629234314, "total_norm": 2.191408634185791}
|
| 6905 |
+
{"epoch": 19, "step": 6871, "loss": 1.0214215517044067, "total_norm": 2.2155375480651855}
|
| 6906 |
+
{"epoch": 19, "step": 6872, "loss": 0.9833791255950928, "total_norm": 2.110361337661743}
|
| 6907 |
+
{"epoch": 19, "step": 6873, "loss": 0.7849990725517273, "total_norm": 1.9442347288131714}
|
| 6908 |
+
{"epoch": 19, "step": 6874, "loss": 0.9336222410202026, "total_norm": 1.9033899307250977}
|
| 6909 |
+
{"epoch": 19, "step": 6875, "loss": 1.1075769662857056, "total_norm": 1.919631838798523}
|
| 6910 |
+
{"epoch": 19, "step": 6876, "loss": 1.054406762123108, "total_norm": 2.438117265701294}
|
| 6911 |
+
{"epoch": 19, "step": 6877, "loss": 0.7375928163528442, "total_norm": 1.8591747283935547}
|
| 6912 |
+
{"epoch": 19, "step": 6878, "loss": 0.9381473660469055, "total_norm": 2.3920321464538574}
|
| 6913 |
+
{"epoch": 19, "step": 6879, "loss": 1.0483653545379639, "total_norm": 1.9371604919433594}
|
| 6914 |
+
{"epoch": 19, "step": 6880, "loss": 0.8435301780700684, "total_norm": 2.0042455196380615}
|
| 6915 |
+
{"epoch": 19, "step": 6881, "loss": 0.9901301860809326, "total_norm": 2.222156286239624}
|
| 6916 |
+
{"epoch": 19, "step": 6882, "loss": 0.994422972202301, "total_norm": 2.2989017963409424}
|
| 6917 |
+
{"epoch": 19, "step": 6883, "loss": 0.8921369910240173, "total_norm": 2.464988946914673}
|
| 6918 |
+
{"epoch": 19, "step": 6884, "loss": 0.9643175005912781, "total_norm": 2.2248263359069824}
|
| 6919 |
+
{"epoch": 19, "step": 6885, "loss": 1.0734280347824097, "total_norm": 2.202399969100952}
|
| 6920 |
+
{"epoch": 19, "step": 6886, "loss": 1.1113524436950684, "total_norm": 2.1435580253601074}
|
| 6921 |
+
{"epoch": 19, "step": 6887, "loss": 1.1511136293411255, "total_norm": 2.174923896789551}
|
| 6922 |
+
{"epoch": 19, "step": 6888, "loss": 1.0782922506332397, "total_norm": 2.0772650241851807}
|
| 6923 |
+
{"epoch": 19, "step": 6889, "loss": 1.0159008502960205, "total_norm": 2.4715816974639893}
|
| 6924 |
+
{"epoch": 19, "step": 6890, "loss": 0.9337886571884155, "total_norm": 2.1779024600982666}
|
| 6925 |
+
{"epoch": 19, "step": 6891, "loss": 0.9618430137634277, "total_norm": 2.1075124740600586}
|
| 6926 |
+
{"epoch": 19, "step": 6892, "loss": 0.6936129927635193, "total_norm": 2.2419233322143555}
|
| 6927 |
+
{"epoch": 19, "step": 6893, "loss": 1.0113847255706787, "total_norm": 2.076672077178955}
|
| 6928 |
+
{"epoch": 19, "step": 6894, "loss": 0.9418612718582153, "total_norm": 1.9536205530166626}
|
| 6929 |
+
{"epoch": 19, "step": 6895, "loss": 0.8445526361465454, "total_norm": 1.8435953855514526}
|
| 6930 |
+
{"epoch": 19, "step": 6896, "loss": 0.8890517950057983, "total_norm": 2.3285293579101562}
|
| 6931 |
+
{"epoch": 19, "step": 6897, "loss": 0.8605286478996277, "total_norm": 2.403531789779663}
|
| 6932 |
+
{"epoch": 19, "step": 6898, "loss": 0.936241865158081, "total_norm": 2.2462048530578613}
|
| 6933 |
+
{"epoch": 19, "step": 6899, "loss": 0.985892653465271, "total_norm": 2.361196279525757}
|
| 6934 |
+
{"epoch": 19, "step": 6900, "loss": 1.069823145866394, "total_norm": 2.2482874393463135}
|
| 6935 |
+
{"epoch": 19, "step": 6901, "loss": 0.949264407157898, "total_norm": 2.4188482761383057}
|
| 6936 |
+
{"epoch": 19, "step": 6902, "loss": 0.8783237338066101, "total_norm": 2.3609557151794434}
|
| 6937 |
+
{"epoch": 19, "step": 6903, "loss": 1.020238995552063, "total_norm": 2.260701894760132}
|
| 6938 |
+
{"epoch": 19, "step": 6904, "loss": 1.0693256855010986, "total_norm": 2.276893138885498}
|
| 6939 |
+
{"epoch": 19, "step": 6905, "loss": 0.9427741169929504, "total_norm": 2.252544641494751}
|
| 6940 |
+
{"epoch": 19, "step": 6906, "loss": 0.9233540892601013, "total_norm": 1.9294661283493042}
|
| 6941 |
+
{"epoch": 19, "step": 6907, "loss": 1.0738775730133057, "total_norm": 2.1937506198883057}
|
| 6942 |
+
{"epoch": 19, "step": 6908, "loss": 0.947857141494751, "total_norm": 2.7249279022216797}
|
| 6943 |
+
{"epoch": 19, "step": 6909, "loss": 0.9879264831542969, "total_norm": 2.0465290546417236}
|
| 6944 |
+
{"epoch": 19, "step": 6910, "loss": 1.0990848541259766, "total_norm": 2.1441245079040527}
|
| 6945 |
+
{"epoch": 19, "step": 6911, "loss": 0.9513936042785645, "total_norm": 2.3086674213409424}
|
| 6946 |
+
{"epoch": 19, "step": 6912, "loss": 1.040450096130371, "total_norm": 2.037036418914795}
|
| 6947 |
+
{"epoch": 19, "step": 6913, "loss": 0.9788554906845093, "total_norm": 2.106426477432251}
|
| 6948 |
+
{"epoch": 19, "step": 6914, "loss": 0.9122462272644043, "total_norm": 1.6863927841186523}
|
| 6949 |
+
{"epoch": 19, "step": 6915, "loss": 0.8810529708862305, "total_norm": 1.8880096673965454}
|
| 6950 |
+
{"epoch": 19, "step": 6916, "loss": 0.9956991076469421, "total_norm": 2.1052911281585693}
|
| 6951 |
+
{"epoch": 19, "step": 6917, "loss": 0.7554677128791809, "total_norm": 2.2126879692077637}
|
| 6952 |
+
{"epoch": 19, "step": 6918, "loss": 0.8935810923576355, "total_norm": 2.1456363201141357}
|
| 6953 |
+
{"epoch": 19, "step": 6919, "loss": 0.7286329865455627, "total_norm": 2.2539279460906982}
|
| 6954 |
+
{"epoch": 19, "step": 6920, "loss": 0.9911085963249207, "total_norm": 1.983747959136963}
|
| 6955 |
+
{"epoch": 19, "step": 6921, "loss": 0.7945812344551086, "total_norm": 1.9764539003372192}
|
| 6956 |
+
{"epoch": 19, "step": 6922, "loss": 1.0102192163467407, "total_norm": 2.3750619888305664}
|
| 6957 |
+
{"epoch": 19, "step": 6923, "loss": 1.0208792686462402, "total_norm": 2.213378429412842}
|
| 6958 |
+
{"epoch": 19, "step": 6924, "loss": 0.9644502997398376, "total_norm": 2.5459516048431396}
|
| 6959 |
+
{"epoch": 19, "step": 6925, "loss": 0.9470763802528381, "total_norm": 1.8927931785583496}
|
| 6960 |
+
{"epoch": 19, "step": 6926, "loss": 1.0154966115951538, "total_norm": 2.203429937362671}
|
| 6961 |
+
{"epoch": 19, "step": 6927, "loss": 0.9094064235687256, "total_norm": 2.013036012649536}
|
| 6962 |
+
{"epoch": 19, "step": 6928, "loss": 1.1079026460647583, "total_norm": 2.0803496837615967}
|
| 6963 |
+
{"epoch": 19, "step": 6929, "loss": 1.058545470237732, "total_norm": 1.8330177068710327}
|
| 6964 |
+
{"epoch": 19, "step": 6930, "loss": 1.0633176565170288, "total_norm": 2.139163017272949}
|
| 6965 |
+
{"epoch": 19, "step": 6931, "loss": 1.0066580772399902, "total_norm": 2.2102503776550293}
|
| 6966 |
+
{"epoch": 19, "step": 6932, "loss": 1.0006203651428223, "total_norm": 2.446329116821289}
|
| 6967 |
+
{"epoch": 19, "step": 6933, "loss": 0.9236804842948914, "total_norm": 2.237297534942627}
|
| 6968 |
+
{"epoch": 19, "step": 6934, "loss": 0.8919469714164734, "total_norm": 2.0536489486694336}
|
| 6969 |
+
{"epoch": 19, "step": 6935, "loss": 0.7509689331054688, "total_norm": 2.007594108581543}
|
| 6970 |
+
{"epoch": 19, "step": 6936, "loss": 0.8987389206886292, "total_norm": 2.287471294403076}
|
| 6971 |
+
{"epoch": 19, "step": 6937, "loss": 0.8933175802230835, "total_norm": 2.3542613983154297}
|
| 6972 |
+
{"epoch": 19, "step": 6938, "loss": 1.0282896757125854, "total_norm": 1.9594788551330566}
|
| 6973 |
+
{"epoch": 19, "step": 6939, "loss": 0.8692252039909363, "total_norm": 2.225510835647583}
|
| 6974 |
+
{"epoch": 19, "step": 6940, "loss": 1.038893461227417, "total_norm": 2.094167470932007}
|
| 6975 |
+
{"epoch": 19, "step": 6941, "loss": 0.8996456265449524, "total_norm": 2.2807247638702393}
|
| 6976 |
+
{"epoch": 19, "step": 6942, "loss": 0.9856517910957336, "total_norm": 2.219956874847412}
|
| 6977 |
+
{"epoch": 19, "step": 6943, "loss": 0.9309678673744202, "total_norm": 2.151196241378784}
|
| 6978 |
+
{"epoch": 19, "step": 6944, "loss": 0.990230143070221, "total_norm": 1.9134916067123413}
|
| 6979 |
+
{"epoch": 19, "step": 6945, "loss": 0.9145349264144897, "total_norm": 2.4225001335144043}
|
| 6980 |
+
{"epoch": 19, "step": 6946, "loss": 0.9196827411651611, "total_norm": 1.9182929992675781}
|
| 6981 |
+
{"epoch": 19, "step": 6947, "loss": 0.9659733176231384, "total_norm": 2.0149614810943604}
|
| 6982 |
+
{"epoch": 19, "step": 6948, "loss": 0.9372978806495667, "total_norm": 1.915818691253662}
|
| 6983 |
+
{"epoch": 19, "step": 6949, "loss": 1.0568420886993408, "total_norm": 2.6204426288604736}
|
| 6984 |
+
{"epoch": 19, "step": 6950, "loss": 0.7919695377349854, "total_norm": 1.7510161399841309}
|
| 6985 |
+
{"epoch": 19, "step": 6951, "loss": 1.0266350507736206, "total_norm": 2.236429214477539}
|
| 6986 |
+
{"epoch": 19, "step": 6952, "loss": 0.9685720205307007, "total_norm": 2.0537679195404053}
|
| 6987 |
+
{"epoch": 19, "step": 6953, "loss": 0.9450114369392395, "total_norm": 1.9971708059310913}
|
| 6988 |
+
{"epoch": 19, "step": 6954, "loss": 1.142397403717041, "total_norm": 1.818435549736023}
|
| 6989 |
+
{"epoch": 19, "step": 6955, "loss": 0.8478761315345764, "total_norm": 2.23006010055542}
|
| 6990 |
+
{"epoch": 19, "step": 6956, "loss": 1.0589072704315186, "total_norm": 1.9676119089126587}
|
| 6991 |
+
{"epoch": 19, "step": 6957, "loss": 1.0863511562347412, "total_norm": 1.9803886413574219}
|
| 6992 |
+
{"epoch": 19, "step": 6958, "loss": 0.8859145641326904, "total_norm": 2.3226351737976074}
|
| 6993 |
+
{"epoch": 19, "step": 6959, "loss": 0.9547861218452454, "total_norm": 2.2813353538513184}
|
| 6994 |
+
{"epoch": 19, "step": 6960, "loss": 1.0618929862976074, "total_norm": 2.1635935306549072}
|
| 6995 |
+
{"epoch": 19, "step": 6961, "loss": 0.994769811630249, "total_norm": 2.334472417831421}
|
| 6996 |
+
{"epoch": 19, "step": 6962, "loss": 0.9463576674461365, "total_norm": 2.3125622272491455}
|
| 6997 |
+
{"epoch": 19, "step": 6963, "loss": 0.7869423031806946, "total_norm": 2.181838274002075}
|
| 6998 |
+
{"epoch": 19, "step": 6964, "loss": 0.9782577157020569, "total_norm": 1.8346328735351562}
|
| 6999 |
+
{"epoch": 19, "step": 6965, "loss": 1.0985389947891235, "total_norm": 2.5115554332733154}
|
| 7000 |
+
{"epoch": 19, "step": 6966, "loss": 1.0277409553527832, "total_norm": 2.5366673469543457}
|
| 7001 |
+
{"epoch": 19, "step": 6967, "loss": 1.1022998094558716, "total_norm": 2.5560286045074463}
|
| 7002 |
+
{"epoch": 19, "step": 6968, "loss": 1.0439813137054443, "total_norm": 2.3058342933654785}
|
| 7003 |
+
{"epoch": 19, "step": 6969, "loss": 0.8692848086357117, "total_norm": 2.2116386890411377}
|
| 7004 |
+
{"epoch": 19, "step": 6970, "loss": 1.058517336845398, "total_norm": 2.149120569229126}
|
| 7005 |
+
{"epoch": 19, "step": 6971, "loss": 0.9467601180076599, "total_norm": 1.9894059896469116}
|
| 7006 |
+
{"epoch": 19, "step": 6972, "loss": 1.0430189371109009, "total_norm": 1.8023005723953247}
|
| 7007 |
+
{"epoch": 19, "step": 6973, "loss": 0.8603999614715576, "total_norm": 1.7587926387786865}
|
| 7008 |
+
{"epoch": 19, "step": 6974, "loss": 0.970477819442749, "total_norm": 2.0181000232696533}
|
| 7009 |
+
{"epoch": 19, "step": 6975, "loss": 0.9328159093856812, "total_norm": 1.8307887315750122}
|
| 7010 |
+
{"epoch": 19, "step": 6976, "loss": 1.106137752532959, "total_norm": 1.9601081609725952}
|
| 7011 |
+
{"epoch": 19, "step": 6977, "loss": 0.9814282655715942, "total_norm": 2.1260533332824707}
|
| 7012 |
+
{"epoch": 19, "step": 6978, "loss": 0.9663004875183105, "total_norm": 2.0001559257507324}
|
| 7013 |
+
{"epoch": 19, "step": 6979, "loss": 0.9951993227005005, "total_norm": 2.4507079124450684}
|
| 7014 |
+
{"epoch": 19, "step": 6980, "loss": 0.9260803461074829, "total_norm": 1.9259378910064697}
|
| 7015 |
+
{"epoch": 19, "step": 6981, "loss": 0.952734649181366, "total_norm": 1.8264514207839966}
|
| 7016 |
+
{"epoch": 19, "step": 6982, "loss": 0.9772632122039795, "total_norm": 2.3740181922912598}
|
| 7017 |
+
{"epoch": 19, "step": 6983, "loss": 0.9412364363670349, "total_norm": 2.187812089920044}
|
| 7018 |
+
{"epoch": 19, "step": 6984, "loss": 1.09848952293396, "total_norm": 2.0916037559509277}
|
| 7019 |
+
{"epoch": 19, "step": 6985, "loss": 0.764323353767395, "total_norm": 1.8276053667068481}
|
| 7020 |
+
{"epoch": 19, "step": 6986, "loss": 0.817112386226654, "total_norm": 1.8207013607025146}
|
| 7021 |
+
{"epoch": 19, "step": 6987, "loss": 1.0955400466918945, "total_norm": 2.088474988937378}
|
| 7022 |
+
{"epoch": 19, "step": 6988, "loss": 0.9856970310211182, "total_norm": 2.1141703128814697}
|
| 7023 |
+
{"epoch": 19, "step": 6989, "loss": 1.0238953828811646, "total_norm": 2.241731882095337}
|
| 7024 |
+
{"epoch": 19, "step": 6990, "loss": 0.9518187046051025, "total_norm": 2.2154009342193604}
|
| 7025 |
+
{"epoch": 19, "step": 6991, "loss": 1.0417637825012207, "total_norm": 3.1767048835754395}
|
| 7026 |
+
{"epoch": 19, "step": 6992, "loss": 1.1720343828201294, "total_norm": 2.4203708171844482}
|
| 7027 |
+
{"epoch": 19, "step": 6993, "loss": 1.0154680013656616, "total_norm": 1.948460578918457}
|
| 7028 |
+
{"epoch": 19, "step": 6994, "loss": 1.0946097373962402, "total_norm": 2.164571762084961}
|
| 7029 |
+
{"epoch": 19, "step": 6995, "loss": 1.0322253704071045, "total_norm": 1.8893214464187622}
|
| 7030 |
+
{"epoch": 19, "step": 6996, "loss": 0.8894084692001343, "total_norm": 2.4046294689178467}
|
| 7031 |
+
{"epoch": 19, "step": 6997, "loss": 0.8197526931762695, "total_norm": 1.986844539642334}
|
| 7032 |
+
{"epoch": 19, "step": 6998, "loss": 1.1723427772521973, "total_norm": 1.7992804050445557}
|
| 7033 |
+
{"epoch": 19, "step": 6999, "loss": 0.9717972278594971, "total_norm": 2.354714870452881}
|
| 7034 |
+
{"epoch": 19, "step": 7000, "eval_loss": 3.5214541256427765, "eval_rougeL": 0.11370361682600127}
|
| 7035 |
+
{"epoch": 19, "step": 7000, "loss": 1.1128922700881958, "total_norm": 2.13863205909729}
|
| 7036 |
+
{"epoch": 19, "step": 7001, "loss": 0.7528970837593079, "total_norm": 2.5054962635040283}
|
| 7037 |
+
{"epoch": 19, "step": 7002, "loss": 0.9513599872589111, "total_norm": 2.2755606174468994}
|
| 7038 |
+
{"epoch": 19, "step": 7003, "loss": 0.7086869478225708, "total_norm": 2.0153772830963135}
|
| 7039 |
+
{"epoch": 19, "step": 7004, "loss": 0.9868971705436707, "total_norm": 2.0628716945648193}
|
| 7040 |
+
{"epoch": 19, "step": 7005, "loss": 1.0734626054763794, "total_norm": 2.3364882469177246}
|
| 7041 |
+
{"epoch": 19, "step": 7006, "loss": 0.9595001935958862, "total_norm": 1.8835570812225342}
|
| 7042 |
+
{"epoch": 19, "step": 7007, "loss": 0.9325742125511169, "total_norm": 1.9833565950393677}
|
| 7043 |
+
{"epoch": 19, "step": 7008, "loss": 0.9694049954414368, "total_norm": 2.2450594902038574}
|
| 7044 |
+
{"epoch": 19, "step": 7009, "loss": 0.9217339158058167, "total_norm": 2.266529083251953}
|
| 7045 |
+
{"epoch": 19, "step": 7010, "loss": 0.7389083504676819, "total_norm": 2.0624325275421143}
|
| 7046 |
+
{"epoch": 19, "step": 7011, "loss": 0.9662838578224182, "total_norm": 2.648411989212036}
|
| 7047 |
+
{"epoch": 19, "step": 7012, "loss": 0.9563907384872437, "total_norm": 2.4482197761535645}
|
| 7048 |
+
{"epoch": 19, "step": 7013, "loss": 1.0345064401626587, "total_norm": 2.3491077423095703}
|
| 7049 |
+
{"epoch": 19, "step": 7014, "loss": 0.9711717367172241, "total_norm": 2.1522367000579834}
|
| 7050 |
+
{"epoch": 19, "step": 7015, "loss": 1.0831859111785889, "total_norm": 2.526824951171875}
|
| 7051 |
+
{"epoch": 19, "step": 7016, "loss": 1.0964473485946655, "total_norm": 2.261654853820801}
|
| 7052 |
+
{"epoch": 19, "step": 7017, "loss": 0.941961944103241, "total_norm": 2.196976661682129}
|
| 7053 |
+
{"epoch": 19, "step": 7018, "loss": 1.0179048776626587, "total_norm": 1.8604336977005005}
|
| 7054 |
+
{"epoch": 19, "step": 7019, "loss": 1.031291127204895, "total_norm": 2.1300387382507324}
|
| 7055 |
+
{"epoch": 19, "step": 7020, "loss": 0.8688944578170776, "total_norm": 2.065596580505371}
|
| 7056 |
+
{"epoch": 19, "step": 7021, "loss": 1.0722795724868774, "total_norm": 2.16276216506958}
|
| 7057 |
+
{"epoch": 19, "step": 7022, "loss": 0.9276126027107239, "total_norm": 2.26115083694458}
|
| 7058 |
+
{"epoch": 19, "step": 7023, "loss": 0.9316175580024719, "total_norm": 1.8462505340576172}
|
| 7059 |
+
{"epoch": 19, "step": 7024, "loss": 1.0301700830459595, "total_norm": 2.0485646724700928}
|
| 7060 |
+
{"epoch": 19, "step": 7025, "loss": 0.896314263343811, "total_norm": 2.5268728733062744}
|
| 7061 |
+
{"epoch": 19, "step": 7026, "loss": 0.9199168086051941, "total_norm": 2.4221689701080322}
|
| 7062 |
+
{"epoch": 19, "step": 7027, "loss": 0.9362953901290894, "total_norm": 2.1364996433258057}
|
| 7063 |
+
{"epoch": 19, "step": 7028, "loss": 1.0316636562347412, "total_norm": 2.7341268062591553}
|
| 7064 |
+
{"epoch": 19, "step": 7029, "loss": 1.0331467390060425, "total_norm": 2.2750051021575928}
|
| 7065 |
+
{"epoch": 19, "step": 7030, "loss": 0.9868753552436829, "total_norm": 2.299497365951538}
|
| 7066 |
+
{"epoch": 19, "step": 7031, "loss": 1.001501441001892, "total_norm": 2.011687755584717}
|
| 7067 |
+
{"epoch": 19, "step": 7032, "loss": 1.0627403259277344, "total_norm": 2.494314432144165}
|
| 7068 |
+
{"epoch": 19, "step": 7033, "loss": 0.732688844203949, "total_norm": 2.423496723175049}
|
| 7069 |
+
{"epoch": 19, "step": 7034, "loss": 1.011303186416626, "total_norm": 2.5379226207733154}
|
| 7070 |
+
{"epoch": 19, "step": 7035, "loss": 0.9697454571723938, "total_norm": 2.383070468902588}
|
| 7071 |
+
{"epoch": 19, "step": 7036, "loss": 0.9401509165763855, "total_norm": 2.020714282989502}
|
| 7072 |
+
{"epoch": 19, "step": 7037, "loss": 1.0511904954910278, "total_norm": 2.134523868560791}
|
| 7073 |
+
{"epoch": 19, "step": 7038, "loss": 0.9834908843040466, "total_norm": 2.0019540786743164}
|
| 7074 |
+
{"epoch": 19, "step": 7039, "loss": 1.022826075553894, "total_norm": 2.4211764335632324}
|
| 7075 |
+
{"epoch": 19, "step": 7040, "loss": 1.0272319316864014, "total_norm": 2.7690317630767822}
|
| 7076 |
+
{"epoch": 19, "step": 7041, "loss": 1.0156784057617188, "total_norm": 1.8849399089813232}
|
| 7077 |
+
{"epoch": 19, "step": 7042, "loss": 0.9440536499023438, "total_norm": 2.2934746742248535}
|
| 7078 |
+
{"epoch": 19, "step": 7043, "loss": 0.96610426902771, "total_norm": 2.234178066253662}
|
| 7079 |
+
{"epoch": 19, "step": 7044, "loss": 0.8899967670440674, "total_norm": 2.175753593444824}
|
| 7080 |
+
{"epoch": 19, "step": 7045, "loss": 0.8272766470909119, "total_norm": 2.2437081336975098}
|
| 7081 |
+
{"epoch": 19, "step": 7046, "loss": 0.9929255843162537, "total_norm": 2.3534531593322754}
|
| 7082 |
+
{"epoch": 19, "step": 7047, "loss": 0.7814865708351135, "total_norm": 2.4201340675354004}
|
| 7083 |
+
{"epoch": 19, "step": 7048, "loss": 0.8121892809867859, "total_norm": 2.1169636249542236}
|
| 7084 |
+
{"epoch": 19, "step": 7049, "loss": 1.0558141469955444, "total_norm": 2.0264806747436523}
|
| 7085 |
+
{"epoch": 19, "step": 7050, "loss": 0.9145981073379517, "total_norm": 2.293095588684082}
|
| 7086 |
+
{"epoch": 19, "step": 7051, "loss": 0.8409807085990906, "total_norm": 2.253660202026367}
|
| 7087 |
+
{"epoch": 19, "step": 7052, "loss": 0.8930720686912537, "total_norm": 1.9313265085220337}
|
| 7088 |
+
{"epoch": 19, "step": 7053, "loss": 0.8538739085197449, "total_norm": 2.044586420059204}
|
| 7089 |
+
{"epoch": 19, "step": 7054, "loss": 0.8549558520317078, "total_norm": 2.0862460136413574}
|
| 7090 |
+
{"epoch": 19, "step": 7055, "loss": 0.9616589546203613, "total_norm": 2.0946474075317383}
|
| 7091 |
+
{"epoch": 19, "step": 7056, "loss": 1.0670510530471802, "total_norm": 2.1236109733581543}
|
| 7092 |
+
{"epoch": 19, "step": 7057, "loss": 0.9218481779098511, "total_norm": 2.0605156421661377}
|
| 7093 |
+
{"epoch": 19, "step": 7058, "loss": 0.9689083099365234, "total_norm": 1.9805926084518433}
|
| 7094 |
+
{"epoch": 19, "step": 7059, "loss": 0.8964155316352844, "total_norm": 2.216573715209961}
|
| 7095 |
+
{"epoch": 19, "step": 7060, "loss": 1.0813244581222534, "total_norm": 2.400559902191162}
|
| 7096 |
+
{"epoch": 19, "step": 7061, "loss": 1.0182946920394897, "total_norm": 2.009669065475464}
|
| 7097 |
+
{"epoch": 19, "step": 7062, "loss": 0.9501098394393921, "total_norm": 2.2862977981567383}
|
| 7098 |
+
{"epoch": 19, "step": 7063, "loss": 1.036272644996643, "total_norm": 2.0067906379699707}
|
| 7099 |
+
{"epoch": 19, "step": 7064, "loss": 0.9323742985725403, "total_norm": 2.250373363494873}
|
| 7100 |
+
{"epoch": 19, "step": 7065, "loss": 0.9177893996238708, "total_norm": 2.358621835708618}
|
| 7101 |
+
{"epoch": 19, "step": 7066, "loss": 0.9968000650405884, "total_norm": 2.4013137817382812}
|
| 7102 |
+
{"epoch": 19, "step": 7067, "loss": 0.7978839874267578, "total_norm": 1.8765183687210083}
|
| 7103 |
+
{"epoch": 19, "step": 7068, "loss": 0.8304702043533325, "total_norm": 2.0758557319641113}
|
| 7104 |
+
{"epoch": 19, "step": 7069, "loss": 1.0406835079193115, "total_norm": 2.2935354709625244}
|
| 7105 |
+
{"epoch": 19, "step": 7070, "loss": 0.9664809703826904, "total_norm": 2.066673517227173}
|
| 7106 |
+
{"epoch": 19, "step": 7071, "loss": 0.8273698687553406, "total_norm": 2.0826475620269775}
|
| 7107 |
+
{"epoch": 19, "step": 7072, "loss": 0.8763972520828247, "total_norm": 1.8392996788024902}
|
| 7108 |
+
{"epoch": 19, "step": 7073, "loss": 0.7914746999740601, "total_norm": 2.443016767501831}
|
| 7109 |
+
{"epoch": 19, "step": 7074, "loss": 1.0193135738372803, "total_norm": 2.1009511947631836}
|
| 7110 |
+
{"epoch": 19, "step": 7075, "loss": 1.0108946561813354, "total_norm": 2.048048734664917}
|
| 7111 |
+
{"epoch": 19, "step": 7076, "loss": 1.0598605871200562, "total_norm": 2.532162666320801}
|
| 7112 |
+
{"epoch": 19, "step": 7077, "loss": 0.92177414894104, "total_norm": 2.367065906524658}
|
| 7113 |
+
{"epoch": 19, "step": 7078, "loss": 0.9204901456832886, "total_norm": 2.3489468097686768}
|
| 7114 |
+
{"epoch": 19, "step": 7079, "loss": 0.9397857785224915, "total_norm": 2.301786184310913}
|
| 7115 |
+
{"epoch": 19, "step": 7080, "loss": 0.9959754943847656, "total_norm": 2.4565441608428955}
|
| 7116 |
+
{"epoch": 19, "step": 7081, "loss": 0.8819459080696106, "total_norm": 2.1640584468841553}
|
| 7117 |
+
{"epoch": 19, "step": 7082, "loss": 1.0106791257858276, "total_norm": 2.2324161529541016}
|
| 7118 |
+
{"epoch": 19, "step": 7083, "loss": 0.8796620965003967, "total_norm": 2.1432862281799316}
|
| 7119 |
+
{"epoch": 19, "step": 7084, "loss": 1.0346994400024414, "total_norm": 2.334195137023926}
|
| 7120 |
+
{"epoch": 19, "step": 7085, "loss": 0.8665070533752441, "total_norm": 2.455230474472046}
|
| 7121 |
+
{"epoch": 19, "step": 7086, "loss": 0.8724362254142761, "total_norm": 2.190770387649536}
|
| 7122 |
+
{"epoch": 19, "step": 7087, "loss": 0.9135345816612244, "total_norm": 2.0580408573150635}
|
| 7123 |
+
{"epoch": 19, "step": 7088, "loss": 1.0645614862442017, "total_norm": 2.1385843753814697}
|
| 7124 |
+
{"epoch": 19, "step": 7089, "loss": 0.8499649167060852, "total_norm": 2.1588597297668457}
|
| 7125 |
+
{"epoch": 19, "step": 7090, "loss": 0.904139518737793, "total_norm": 2.2303953170776367}
|
| 7126 |
+
{"epoch": 19, "step": 7091, "loss": 0.9921481609344482, "total_norm": 2.140209674835205}
|
| 7127 |
+
{"epoch": 19, "step": 7092, "loss": 0.7803687453269958, "total_norm": 2.0233328342437744}
|
| 7128 |
+
{"epoch": 19, "step": 7093, "loss": 0.8485361337661743, "total_norm": 2.265378952026367}
|
| 7129 |
+
{"epoch": 19, "step": 7094, "loss": 0.8841637969017029, "total_norm": 1.7431678771972656}
|
| 7130 |
+
{"epoch": 19, "step": 7095, "loss": 1.0644142627716064, "total_norm": 2.0698134899139404}
|
| 7131 |
+
{"epoch": 19, "step": 7096, "loss": 0.7221455574035645, "total_norm": 1.967576503753662}
|
| 7132 |
+
{"epoch": 19, "step": 7097, "loss": 0.903188943862915, "total_norm": 2.069768190383911}
|
| 7133 |
+
{"epoch": 19, "step": 7098, "loss": 0.6667285561561584, "total_norm": 2.4725494384765625}
|
| 7134 |
+
{"epoch": 19, "step": 7099, "loss": 1.0064854621887207, "total_norm": 2.176344156265259}
|
| 7135 |
+
{"epoch": 19, "step": 7100, "loss": 1.055530309677124, "total_norm": 2.0044915676116943}
|
| 7136 |
+
{"epoch": 19, "step": 7101, "loss": 0.9249635338783264, "total_norm": 2.6610918045043945}
|
| 7137 |
+
{"epoch": 19, "step": 7102, "loss": 1.1764768362045288, "total_norm": 2.390986204147339}
|
| 7138 |
+
{"epoch": 19, "step": 7103, "loss": 0.9926992654800415, "total_norm": 2.0740933418273926}
|
| 7139 |
+
{"epoch": 19, "step": 7104, "loss": 0.9333260655403137, "total_norm": 1.9224601984024048}
|
| 7140 |
+
{"epoch": 19, "step": 7105, "loss": 0.9996719360351562, "total_norm": 1.9267510175704956}
|
| 7141 |
+
{"epoch": 19, "step": 7106, "loss": 0.8253880739212036, "total_norm": 2.0084424018859863}
|
| 7142 |
+
{"epoch": 19, "step": 7107, "loss": 0.988351047039032, "total_norm": 1.7980711460113525}
|
| 7143 |
+
{"epoch": 19, "step": 7108, "loss": 0.7937043309211731, "total_norm": 2.189807415008545}
|
| 7144 |
+
{"epoch": 19, "step": 7109, "loss": 0.983729362487793, "total_norm": 2.1599135398864746}
|
| 7145 |
+
{"epoch": 19, "step": 7110, "loss": 1.0713106393814087, "total_norm": 2.1843087673187256}
|
| 7146 |
+
{"epoch": 19, "step": 7111, "loss": 0.8900110125541687, "total_norm": 1.912916898727417}
|
| 7147 |
+
{"epoch": 19, "step": 7112, "loss": 0.9128661751747131, "total_norm": 2.0890517234802246}
|
| 7148 |
+
{"epoch": 19, "step": 7113, "loss": 0.9951633810997009, "total_norm": 1.8762894868850708}
|
| 7149 |
+
{"epoch": 19, "step": 7114, "loss": 1.1023377180099487, "total_norm": 2.1743130683898926}
|
| 7150 |
+
{"epoch": 19, "step": 7115, "loss": 1.052733063697815, "total_norm": 2.4147393703460693}
|
| 7151 |
+
{"epoch": 19, "step": 7116, "loss": 1.0395351648330688, "total_norm": 2.0744309425354004}
|
| 7152 |
+
{"epoch": 19, "step": 7117, "loss": 1.001498818397522, "total_norm": 2.1239731311798096}
|
| 7153 |
+
{"epoch": 19, "step": 7118, "loss": 0.9171522855758667, "total_norm": 2.066336154937744}
|
| 7154 |
+
{"epoch": 19, "step": 7119, "loss": 1.0056990385055542, "total_norm": 2.164456844329834}
|
| 7155 |
+
{"epoch": 19, "step": 7120, "loss": 1.1240602731704712, "total_norm": 2.1806423664093018}
|
| 7156 |
+
{"epoch": 19, "step": 7121, "loss": 0.9927192330360413, "total_norm": 2.271285057067871}
|
| 7157 |
+
{"epoch": 19, "step": 7122, "loss": 1.053239345550537, "total_norm": 2.3571248054504395}
|
| 7158 |
+
{"epoch": 19, "step": 7123, "loss": 0.960978090763092, "total_norm": 2.210730791091919}
|
| 7159 |
+
{"epoch": 19, "step": 7124, "loss": 0.9512543678283691, "total_norm": 2.2578303813934326}
|
| 7160 |
+
{"epoch": 19, "step": 7125, "loss": 0.823019802570343, "total_norm": 2.2518928050994873}
|
| 7161 |
+
{"epoch": 19, "step": 7126, "loss": 1.0185447931289673, "total_norm": 2.2135746479034424}
|
| 7162 |
+
{"epoch": 19, "step": 7127, "loss": 1.0319362878799438, "total_norm": 2.3526275157928467}
|
| 7163 |
+
{"epoch": 19, "step": 7128, "loss": 0.9773253798484802, "total_norm": 2.605573892593384}
|
| 7164 |
+
{"epoch": 19, "step": 7129, "loss": 0.9177132248878479, "total_norm": 2.248727321624756}
|
| 7165 |
+
{"epoch": 19, "step": 7130, "loss": 0.7528162002563477, "total_norm": 2.1526403427124023}
|
| 7166 |
+
{"epoch": 19, "step": 7131, "loss": 1.0723429918289185, "total_norm": 2.434816837310791}
|
| 7167 |
+
{"epoch": 19, "step": 7132, "loss": 0.9947962164878845, "total_norm": 2.0023837089538574}
|
| 7168 |
+
{"epoch": 19, "step": 7133, "loss": 0.9768377542495728, "total_norm": 1.8975567817687988}
|
| 7169 |
+
{"epoch": 19, "step": 7134, "loss": 1.0053088665008545, "total_norm": 1.765081524848938}
|
| 7170 |
+
{"epoch": 19, "step": 7135, "loss": 1.0228360891342163, "total_norm": 1.9960452318191528}
|
| 7171 |
+
{"epoch": 19, "step": 7136, "loss": 0.855567991733551, "total_norm": 1.9088129997253418}
|
| 7172 |
+
{"epoch": 19, "step": 7137, "loss": 0.950056254863739, "total_norm": 2.3212618827819824}
|
| 7173 |
+
{"epoch": 19, "step": 7138, "loss": 0.9388453364372253, "total_norm": 2.1661605834960938}
|
| 7174 |
+
{"epoch": 19, "step": 7139, "loss": 0.9138604998588562, "total_norm": 2.063908576965332}
|
| 7175 |
+
{"epoch": 19, "step": 7140, "loss": 0.9172233939170837, "total_norm": 2.2232890129089355}
|
| 7176 |
+
{"epoch": 19, "step": 7141, "loss": 0.9988603591918945, "total_norm": 2.0956342220306396}
|
| 7177 |
+
{"epoch": 19, "step": 7142, "loss": 1.054807424545288, "total_norm": 2.0783443450927734}
|
| 7178 |
+
{"epoch": 19, "step": 7143, "loss": 0.92840576171875, "total_norm": 4.907958984375}
|
| 7179 |
+
{"epoch": 19, "step": 7144, "loss": 0.8940110206604004, "total_norm": 2.4722609519958496}
|
| 7180 |
+
{"epoch": 19, "step": 7145, "loss": 1.0248395204544067, "total_norm": 2.2408180236816406}
|
| 7181 |
+
{"epoch": 19, "step": 7146, "loss": 0.9071243405342102, "total_norm": 2.1310484409332275}
|
| 7182 |
+
{"epoch": 19, "step": 7147, "loss": 0.9536292552947998, "total_norm": 1.9045510292053223}
|
| 7183 |
+
{"epoch": 19, "step": 7148, "loss": 0.9300982356071472, "total_norm": 2.0717697143554688}
|
| 7184 |
+
{"epoch": 19, "step": 7149, "loss": 0.9476808905601501, "total_norm": 1.9408111572265625}
|
| 7185 |
+
{"epoch": 19, "step": 7150, "loss": 1.059009075164795, "total_norm": 2.23828125}
|
| 7186 |
+
{"epoch": 19, "step": 7151, "loss": 1.0218875408172607, "total_norm": 2.5802628993988037}
|
| 7187 |
+
{"epoch": 19, "step": 7152, "loss": 0.8046242594718933, "total_norm": 2.042820930480957}
|
| 7188 |
+
{"epoch": 19, "step": 7153, "loss": 1.047874093055725, "total_norm": 2.604940414428711}
|
| 7189 |
+
{"epoch": 19, "step": 7154, "loss": 0.9886680245399475, "total_norm": 2.090238571166992}
|
| 7190 |
+
{"epoch": 19, "step": 7155, "loss": 0.7961331009864807, "total_norm": 1.9170571565628052}
|
| 7191 |
+
{"epoch": 19, "step": 7156, "loss": 0.9734504818916321, "total_norm": 2.173192262649536}
|
| 7192 |
+
{"epoch": 19, "step": 7157, "loss": 1.0776722431182861, "total_norm": 2.3109467029571533}
|
| 7193 |
+
{"epoch": 19, "step": 7158, "loss": 1.0899198055267334, "total_norm": 2.5292632579803467}
|
| 7194 |
+
{"epoch": 19, "step": 7159, "loss": 0.9745498895645142, "total_norm": 4.019927978515625}
|
| 7195 |
+
{"epoch": 19, "step": 7160, "eval_loss": 3.5230754017829895, "eval_rougeL": 0.11275351405322884}
|