Đào Quốc Tuấn commited on
Commit
682916a
·
verified ·
1 Parent(s): b06b4a5

Upload folder using huggingface_hub

Browse files
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "transformers_version": "4.56.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.56.0"
6
+ }
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df9536b2d082750fabcb08785615b55be371e90feff36c25893bbf29d988cd4f
3
+ size 497774208
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
experiments/sft_gpt2-120m/20251117_210625/checkpoints/epoch_19/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251117_210625/sft_gpt2-120m-1.log CHANGED
@@ -904,3 +904,41 @@
904
  2025-11-17 22:09:17,820 - absl - INFO - Using default tokenizer.
905
  2025-11-17 22:09:19,667 - root - INFO - Step 13601/14300 train rougeL: 0.15701778235940564
906
  2025-11-17 22:09:19,753 - root - INFO - Step 13601/14300 loss: 1.5197197198867798, total_norm: 2.724116086959839
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
904
  2025-11-17 22:09:17,820 - absl - INFO - Using default tokenizer.
905
  2025-11-17 22:09:19,667 - root - INFO - Step 13601/14300 train rougeL: 0.15701778235940564
906
  2025-11-17 22:09:19,753 - root - INFO - Step 13601/14300 loss: 1.5197197198867798, total_norm: 2.724116086959839
907
+ 2025-11-17 22:09:32,963 - absl - INFO - Using default tokenizer.
908
+ 2025-11-17 22:09:34,768 - root - INFO - Step 13701/14300 train rougeL: 0.11366732069026135
909
+ 2025-11-17 22:09:34,853 - root - INFO - Step 13701/14300 loss: 1.4202815294265747, total_norm: 2.931837320327759
910
+ 2025-11-17 22:09:48,013 - absl - INFO - Using default tokenizer.
911
+ 2025-11-17 22:09:49,836 - root - INFO - Step 13801/14300 train rougeL: 0.0994162201921235
912
+ 2025-11-17 22:09:49,921 - root - INFO - Step 13801/14300 loss: 1.4564669132232666, total_norm: 3.5156350135803223
913
+ 2025-11-17 22:10:03,058 - absl - INFO - Using default tokenizer.
914
+ 2025-11-17 22:10:04,849 - root - INFO - Step 13901/14300 train rougeL: 0.10846804422799555
915
+ 2025-11-17 22:10:04,935 - root - INFO - Step 13901/14300 loss: 1.422178030014038, total_norm: 5.304150581359863
916
+ 2025-11-17 22:10:18,041 - root - INFO - Step 14001/14300 finished
917
+ 2025-11-17 22:10:18,368 - absl - INFO - Using default tokenizer.
918
+ 2025-11-17 22:10:22,949 - absl - INFO - Using default tokenizer.
919
+ 2025-11-17 22:10:27,422 - absl - INFO - Using default tokenizer.
920
+ 2025-11-17 22:10:31,913 - absl - INFO - Using default tokenizer.
921
+ 2025-11-17 22:10:36,472 - absl - INFO - Using default tokenizer.
922
+ 2025-11-17 22:10:40,963 - absl - INFO - Using default tokenizer.
923
+ 2025-11-17 22:10:45,442 - absl - INFO - Using default tokenizer.
924
+ 2025-11-17 22:10:49,870 - absl - INFO - Using default tokenizer.
925
+ 2025-11-17 22:10:53,426 - root - INFO - Epoch 20/20 eval loss: 3.1131076514720917, eval rougeL: 0.11258248826184435
926
+ 2025-11-17 22:10:53,474 - absl - INFO - Using default tokenizer.
927
+ 2025-11-17 22:10:55,280 - root - INFO - Step 14001/14300 train rougeL: 0.12588734530162038
928
+ 2025-11-17 22:10:55,366 - root - INFO - Step 14001/14300 loss: 1.4486746788024902, total_norm: 3.31862473487854
929
+ 2025-11-17 22:11:08,516 - absl - INFO - Using default tokenizer.
930
+ 2025-11-17 22:11:10,311 - root - INFO - Step 14101/14300 train rougeL: 0.15127445141006263
931
+ 2025-11-17 22:11:10,396 - root - INFO - Step 14101/14300 loss: 1.3603423833847046, total_norm: 3.2848446369171143
932
+ 2025-11-17 22:11:23,539 - absl - INFO - Using default tokenizer.
933
+ 2025-11-17 22:11:25,348 - root - INFO - Step 14201/14300 train rougeL: 0.1432487511020824
934
+ 2025-11-17 22:11:25,434 - root - INFO - Step 14201/14300 loss: 1.3456073999404907, total_norm: 3.573255777359009
935
+ 2025-11-17 22:11:38,517 - root - INFO - Epoch 20/20 finished
936
+ 2025-11-17 22:11:38,844 - absl - INFO - Using default tokenizer.
937
+ 2025-11-17 22:11:43,744 - absl - INFO - Using default tokenizer.
938
+ 2025-11-17 22:11:48,268 - absl - INFO - Using default tokenizer.
939
+ 2025-11-17 22:11:52,828 - absl - INFO - Using default tokenizer.
940
+ 2025-11-17 22:11:57,474 - absl - INFO - Using default tokenizer.
941
+ 2025-11-17 22:12:01,970 - absl - INFO - Using default tokenizer.
942
+ 2025-11-17 22:12:06,417 - absl - INFO - Using default tokenizer.
943
+ 2025-11-17 22:12:10,884 - absl - INFO - Using default tokenizer.
944
+ 2025-11-17 22:12:14,442 - root - INFO - Epoch 20/20 eval loss: 3.112957239151001, eval rougeL: 0.10973326032172814
experiments/sft_gpt2-120m/20251117_210625/sft_gpt2-120m-1_metrics.jsonl CHANGED
@@ -13745,3 +13745,605 @@
13745
  {"epoch": 19, "step": 13697, "loss": 1.4697589874267578, "total_norm": 3.133631467819214}
13746
  {"epoch": 19, "step": 13698, "loss": 1.5667927265167236, "total_norm": 3.9558358192443848}
13747
  {"epoch": 19, "step": 13699, "loss": 1.4557989835739136, "total_norm": 4.509970664978027}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13745
  {"epoch": 19, "step": 13697, "loss": 1.4697589874267578, "total_norm": 3.133631467819214}
13746
  {"epoch": 19, "step": 13698, "loss": 1.5667927265167236, "total_norm": 3.9558358192443848}
13747
  {"epoch": 19, "step": 13699, "loss": 1.4557989835739136, "total_norm": 4.509970664978027}
13748
+ {"epoch": 19, "step": 13700, "loss": 1.4202815294265747, "total_norm": 2.931837320327759}
13749
+ {"epoch": 19, "step": 13701, "loss": 1.2778759002685547, "total_norm": 2.9312334060668945}
13750
+ {"epoch": 19, "step": 13702, "loss": 1.441731333732605, "total_norm": 2.8377246856689453}
13751
+ {"epoch": 19, "step": 13703, "loss": 1.2610598802566528, "total_norm": 4.324733257293701}
13752
+ {"epoch": 19, "step": 13704, "loss": 1.215844750404358, "total_norm": 4.137703895568848}
13753
+ {"epoch": 19, "step": 13705, "loss": 1.3055145740509033, "total_norm": 4.699143409729004}
13754
+ {"epoch": 19, "step": 13706, "loss": 1.3977113962173462, "total_norm": 3.498288869857788}
13755
+ {"epoch": 19, "step": 13707, "loss": 1.2223742008209229, "total_norm": 3.7560906410217285}
13756
+ {"epoch": 19, "step": 13708, "loss": 1.1908674240112305, "total_norm": 4.433256149291992}
13757
+ {"epoch": 19, "step": 13709, "loss": 0.9903541803359985, "total_norm": 3.6701900959014893}
13758
+ {"epoch": 19, "step": 13710, "loss": 1.4382474422454834, "total_norm": 3.9354238510131836}
13759
+ {"epoch": 19, "step": 13711, "loss": 1.2640063762664795, "total_norm": 3.342405319213867}
13760
+ {"epoch": 19, "step": 13712, "loss": 1.4471933841705322, "total_norm": 3.1612296104431152}
13761
+ {"epoch": 19, "step": 13713, "loss": 1.7206968069076538, "total_norm": 3.7707369327545166}
13762
+ {"epoch": 19, "step": 13714, "loss": 1.4971662759780884, "total_norm": 3.9654316902160645}
13763
+ {"epoch": 19, "step": 13715, "loss": 1.3122522830963135, "total_norm": 3.911064863204956}
13764
+ {"epoch": 19, "step": 13716, "loss": 1.5764912366867065, "total_norm": 4.184034824371338}
13765
+ {"epoch": 19, "step": 13717, "loss": 1.5999218225479126, "total_norm": 3.430708885192871}
13766
+ {"epoch": 19, "step": 13718, "loss": 1.2970463037490845, "total_norm": 3.5060408115386963}
13767
+ {"epoch": 19, "step": 13719, "loss": 1.2135618925094604, "total_norm": 3.313222885131836}
13768
+ {"epoch": 19, "step": 13720, "loss": 1.3143032789230347, "total_norm": 3.129410743713379}
13769
+ {"epoch": 19, "step": 13721, "loss": 1.6006815433502197, "total_norm": 3.568342924118042}
13770
+ {"epoch": 19, "step": 13722, "loss": 1.561439037322998, "total_norm": 3.1541624069213867}
13771
+ {"epoch": 19, "step": 13723, "loss": 1.0635932683944702, "total_norm": 3.659534454345703}
13772
+ {"epoch": 19, "step": 13724, "loss": 1.3365991115570068, "total_norm": 3.8562846183776855}
13773
+ {"epoch": 19, "step": 13725, "loss": 1.6040537357330322, "total_norm": 3.2983391284942627}
13774
+ {"epoch": 19, "step": 13726, "loss": 1.6450145244598389, "total_norm": 3.039081573486328}
13775
+ {"epoch": 19, "step": 13727, "loss": 1.5556824207305908, "total_norm": 4.340780735015869}
13776
+ {"epoch": 19, "step": 13728, "loss": 1.2935829162597656, "total_norm": 3.1901602745056152}
13777
+ {"epoch": 19, "step": 13729, "loss": 1.1055216789245605, "total_norm": 3.711310625076294}
13778
+ {"epoch": 19, "step": 13730, "loss": 1.2778816223144531, "total_norm": 4.001725673675537}
13779
+ {"epoch": 19, "step": 13731, "loss": 1.5406831502914429, "total_norm": 3.1844496726989746}
13780
+ {"epoch": 19, "step": 13732, "loss": 1.5421829223632812, "total_norm": 4.003876686096191}
13781
+ {"epoch": 19, "step": 13733, "loss": 1.3820418119430542, "total_norm": 4.243008613586426}
13782
+ {"epoch": 19, "step": 13734, "loss": 1.4343942403793335, "total_norm": 3.431849241256714}
13783
+ {"epoch": 19, "step": 13735, "loss": 0.9464629888534546, "total_norm": 3.0899643898010254}
13784
+ {"epoch": 19, "step": 13736, "loss": 1.276984691619873, "total_norm": 3.6651976108551025}
13785
+ {"epoch": 19, "step": 13737, "loss": 1.564530611038208, "total_norm": 3.126093626022339}
13786
+ {"epoch": 19, "step": 13738, "loss": 1.4991304874420166, "total_norm": 3.306224822998047}
13787
+ {"epoch": 19, "step": 13739, "loss": 1.249772310256958, "total_norm": 3.589247226715088}
13788
+ {"epoch": 19, "step": 13740, "loss": 1.6168543100357056, "total_norm": 2.727227210998535}
13789
+ {"epoch": 19, "step": 13741, "loss": 1.310288667678833, "total_norm": 3.733527898788452}
13790
+ {"epoch": 19, "step": 13742, "loss": 1.1505173444747925, "total_norm": 4.16650915145874}
13791
+ {"epoch": 19, "step": 13743, "loss": 1.7538164854049683, "total_norm": 3.3618757724761963}
13792
+ {"epoch": 19, "step": 13744, "loss": 1.5400453805923462, "total_norm": 2.960296869277954}
13793
+ {"epoch": 19, "step": 13745, "loss": 1.7074369192123413, "total_norm": 3.883607864379883}
13794
+ {"epoch": 19, "step": 13746, "loss": 1.183535099029541, "total_norm": 5.011446475982666}
13795
+ {"epoch": 19, "step": 13747, "loss": 1.1250078678131104, "total_norm": 3.2022228240966797}
13796
+ {"epoch": 19, "step": 13748, "loss": 1.0054105520248413, "total_norm": 4.066659450531006}
13797
+ {"epoch": 19, "step": 13749, "loss": 0.9753122925758362, "total_norm": 3.555372953414917}
13798
+ {"epoch": 19, "step": 13750, "loss": 1.5177030563354492, "total_norm": 4.554347038269043}
13799
+ {"epoch": 19, "step": 13751, "loss": 1.7505154609680176, "total_norm": 2.863085985183716}
13800
+ {"epoch": 19, "step": 13752, "loss": 1.1557446718215942, "total_norm": 3.7106857299804688}
13801
+ {"epoch": 19, "step": 13753, "loss": 1.489228367805481, "total_norm": 3.3654918670654297}
13802
+ {"epoch": 19, "step": 13754, "loss": 1.3363553285598755, "total_norm": 3.9609715938568115}
13803
+ {"epoch": 19, "step": 13755, "loss": 1.4819660186767578, "total_norm": 3.8499929904937744}
13804
+ {"epoch": 19, "step": 13756, "loss": 1.4305007457733154, "total_norm": 3.127190589904785}
13805
+ {"epoch": 19, "step": 13757, "loss": 0.8755711913108826, "total_norm": 3.6384356021881104}
13806
+ {"epoch": 19, "step": 13758, "loss": 1.3307551145553589, "total_norm": 4.930772304534912}
13807
+ {"epoch": 19, "step": 13759, "loss": 1.1667275428771973, "total_norm": 3.191586494445801}
13808
+ {"epoch": 19, "step": 13760, "loss": 1.1692935228347778, "total_norm": 3.903918981552124}
13809
+ {"epoch": 19, "step": 13761, "loss": 1.0943982601165771, "total_norm": 3.665388584136963}
13810
+ {"epoch": 19, "step": 13762, "loss": 1.496422290802002, "total_norm": 3.350482940673828}
13811
+ {"epoch": 19, "step": 13763, "loss": 1.2831674814224243, "total_norm": 3.918605089187622}
13812
+ {"epoch": 19, "step": 13764, "loss": 1.4779267311096191, "total_norm": 3.493680715560913}
13813
+ {"epoch": 19, "step": 13765, "loss": 1.5711647272109985, "total_norm": 3.9628729820251465}
13814
+ {"epoch": 19, "step": 13766, "loss": 1.4377470016479492, "total_norm": 4.863666534423828}
13815
+ {"epoch": 19, "step": 13767, "loss": 1.3650213479995728, "total_norm": 4.043210983276367}
13816
+ {"epoch": 19, "step": 13768, "loss": 1.5447533130645752, "total_norm": 3.284945011138916}
13817
+ {"epoch": 19, "step": 13769, "loss": 1.397149920463562, "total_norm": 3.5370569229125977}
13818
+ {"epoch": 19, "step": 13770, "loss": 1.540711522102356, "total_norm": 3.3004181385040283}
13819
+ {"epoch": 19, "step": 13771, "loss": 1.2693753242492676, "total_norm": 4.840665817260742}
13820
+ {"epoch": 19, "step": 13772, "loss": 1.485548496246338, "total_norm": 3.1029722690582275}
13821
+ {"epoch": 19, "step": 13773, "loss": 1.12223219871521, "total_norm": 4.045623302459717}
13822
+ {"epoch": 19, "step": 13774, "loss": 1.3784133195877075, "total_norm": 3.068326473236084}
13823
+ {"epoch": 19, "step": 13775, "loss": 1.3055189847946167, "total_norm": 3.379514694213867}
13824
+ {"epoch": 19, "step": 13776, "loss": 1.0138485431671143, "total_norm": 2.8218653202056885}
13825
+ {"epoch": 19, "step": 13777, "loss": 1.6893198490142822, "total_norm": 2.94746470451355}
13826
+ {"epoch": 19, "step": 13778, "loss": 1.1289879083633423, "total_norm": 3.245089530944824}
13827
+ {"epoch": 19, "step": 13779, "loss": 1.3741838932037354, "total_norm": 3.9876036643981934}
13828
+ {"epoch": 19, "step": 13780, "loss": 1.1878161430358887, "total_norm": 3.711336612701416}
13829
+ {"epoch": 19, "step": 13781, "loss": 1.24175226688385, "total_norm": 3.512591600418091}
13830
+ {"epoch": 19, "step": 13782, "loss": 1.5526046752929688, "total_norm": 3.7270100116729736}
13831
+ {"epoch": 19, "step": 13783, "loss": 1.4932096004486084, "total_norm": 3.3809545040130615}
13832
+ {"epoch": 19, "step": 13784, "loss": 1.4487375020980835, "total_norm": 3.348440647125244}
13833
+ {"epoch": 19, "step": 13785, "loss": 1.0273642539978027, "total_norm": 3.283909559249878}
13834
+ {"epoch": 19, "step": 13786, "loss": 1.4747172594070435, "total_norm": 2.330280065536499}
13835
+ {"epoch": 19, "step": 13787, "loss": 1.263898491859436, "total_norm": 3.2796905040740967}
13836
+ {"epoch": 19, "step": 13788, "loss": 1.451947569847107, "total_norm": 3.46799898147583}
13837
+ {"epoch": 19, "step": 13789, "loss": 1.3231505155563354, "total_norm": 3.7785236835479736}
13838
+ {"epoch": 19, "step": 13790, "loss": 1.546904444694519, "total_norm": 3.1990675926208496}
13839
+ {"epoch": 19, "step": 13791, "loss": 1.2200193405151367, "total_norm": 3.792541742324829}
13840
+ {"epoch": 19, "step": 13792, "loss": 1.3462004661560059, "total_norm": 3.532633066177368}
13841
+ {"epoch": 19, "step": 13793, "loss": 1.4276671409606934, "total_norm": 2.904848098754883}
13842
+ {"epoch": 19, "step": 13794, "loss": 1.7283473014831543, "total_norm": 3.7444827556610107}
13843
+ {"epoch": 19, "step": 13795, "loss": 1.2615970373153687, "total_norm": 2.582883596420288}
13844
+ {"epoch": 19, "step": 13796, "loss": 1.0325043201446533, "total_norm": 4.100792407989502}
13845
+ {"epoch": 19, "step": 13797, "loss": 1.252190113067627, "total_norm": 3.315912961959839}
13846
+ {"epoch": 19, "step": 13798, "loss": 1.307160496711731, "total_norm": 3.7112529277801514}
13847
+ {"epoch": 19, "step": 13799, "loss": 1.500766634941101, "total_norm": 3.0171751976013184}
13848
+ {"epoch": 19, "step": 13800, "loss": 1.4564669132232666, "total_norm": 3.5156350135803223}
13849
+ {"epoch": 19, "step": 13801, "loss": 1.5407418012619019, "total_norm": 3.02687931060791}
13850
+ {"epoch": 19, "step": 13802, "loss": 1.198734998703003, "total_norm": 3.1149706840515137}
13851
+ {"epoch": 19, "step": 13803, "loss": 1.1312769651412964, "total_norm": 3.5085158348083496}
13852
+ {"epoch": 19, "step": 13804, "loss": 0.9562968015670776, "total_norm": 4.590383052825928}
13853
+ {"epoch": 19, "step": 13805, "loss": 1.4128172397613525, "total_norm": 4.326806545257568}
13854
+ {"epoch": 19, "step": 13806, "loss": 1.4068026542663574, "total_norm": 3.1286139488220215}
13855
+ {"epoch": 19, "step": 13807, "loss": 1.0060595273971558, "total_norm": 3.2087011337280273}
13856
+ {"epoch": 19, "step": 13808, "loss": 1.4903991222381592, "total_norm": 3.1580231189727783}
13857
+ {"epoch": 19, "step": 13809, "loss": 1.337005853652954, "total_norm": 3.874448299407959}
13858
+ {"epoch": 19, "step": 13810, "loss": 1.4993717670440674, "total_norm": 3.9942331314086914}
13859
+ {"epoch": 19, "step": 13811, "loss": 1.3318547010421753, "total_norm": 3.6740806102752686}
13860
+ {"epoch": 19, "step": 13812, "loss": 1.4046251773834229, "total_norm": 2.9705467224121094}
13861
+ {"epoch": 19, "step": 13813, "loss": 1.5094555616378784, "total_norm": 3.3285980224609375}
13862
+ {"epoch": 19, "step": 13814, "loss": 1.1623172760009766, "total_norm": 3.7611138820648193}
13863
+ {"epoch": 19, "step": 13815, "loss": 1.2874505519866943, "total_norm": 5.154508590698242}
13864
+ {"epoch": 19, "step": 13816, "loss": 0.9000284671783447, "total_norm": 3.296130418777466}
13865
+ {"epoch": 19, "step": 13817, "loss": 1.611727237701416, "total_norm": 3.371649742126465}
13866
+ {"epoch": 19, "step": 13818, "loss": 1.2515642642974854, "total_norm": 3.831836462020874}
13867
+ {"epoch": 19, "step": 13819, "loss": 1.3288005590438843, "total_norm": 4.373511791229248}
13868
+ {"epoch": 19, "step": 13820, "loss": 1.4120903015136719, "total_norm": 3.679462432861328}
13869
+ {"epoch": 19, "step": 13821, "loss": 1.5089832544326782, "total_norm": 3.79996657371521}
13870
+ {"epoch": 19, "step": 13822, "loss": 1.1963235139846802, "total_norm": 4.381996154785156}
13871
+ {"epoch": 19, "step": 13823, "loss": 1.5002481937408447, "total_norm": 3.346191883087158}
13872
+ {"epoch": 19, "step": 13824, "loss": 1.653077483177185, "total_norm": 3.1834332942962646}
13873
+ {"epoch": 19, "step": 13825, "loss": 1.5261492729187012, "total_norm": 3.27458119392395}
13874
+ {"epoch": 19, "step": 13826, "loss": 1.4260305166244507, "total_norm": 3.7769150733947754}
13875
+ {"epoch": 19, "step": 13827, "loss": 1.3604868650436401, "total_norm": 4.325390815734863}
13876
+ {"epoch": 19, "step": 13828, "loss": 1.7318992614746094, "total_norm": 3.3800482749938965}
13877
+ {"epoch": 19, "step": 13829, "loss": 1.2067185640335083, "total_norm": 3.1083078384399414}
13878
+ {"epoch": 19, "step": 13830, "loss": 1.397711157798767, "total_norm": 2.7553489208221436}
13879
+ {"epoch": 19, "step": 13831, "loss": 1.1913471221923828, "total_norm": 4.364245891571045}
13880
+ {"epoch": 19, "step": 13832, "loss": 1.2564682960510254, "total_norm": 3.8076789379119873}
13881
+ {"epoch": 19, "step": 13833, "loss": 1.49331796169281, "total_norm": 2.9232349395751953}
13882
+ {"epoch": 19, "step": 13834, "loss": 1.492404580116272, "total_norm": 4.05551290512085}
13883
+ {"epoch": 19, "step": 13835, "loss": 1.5833320617675781, "total_norm": 3.3065950870513916}
13884
+ {"epoch": 19, "step": 13836, "loss": 1.215317726135254, "total_norm": 3.3251569271087646}
13885
+ {"epoch": 19, "step": 13837, "loss": 1.1308283805847168, "total_norm": 3.767578363418579}
13886
+ {"epoch": 19, "step": 13838, "loss": 1.5461015701293945, "total_norm": 3.5735371112823486}
13887
+ {"epoch": 19, "step": 13839, "loss": 1.337247610092163, "total_norm": 3.5992603302001953}
13888
+ {"epoch": 19, "step": 13840, "loss": 1.5686924457550049, "total_norm": 3.006532907485962}
13889
+ {"epoch": 19, "step": 13841, "loss": 1.6543413400650024, "total_norm": 3.293738842010498}
13890
+ {"epoch": 19, "step": 13842, "loss": 0.9775860905647278, "total_norm": 3.9088311195373535}
13891
+ {"epoch": 19, "step": 13843, "loss": 1.5631588697433472, "total_norm": 3.610612392425537}
13892
+ {"epoch": 19, "step": 13844, "loss": 1.4013563394546509, "total_norm": 3.6998369693756104}
13893
+ {"epoch": 19, "step": 13845, "loss": 1.3177841901779175, "total_norm": 4.250678062438965}
13894
+ {"epoch": 19, "step": 13846, "loss": 1.4865920543670654, "total_norm": 3.214715003967285}
13895
+ {"epoch": 19, "step": 13847, "loss": 1.4173619747161865, "total_norm": 3.394198417663574}
13896
+ {"epoch": 19, "step": 13848, "loss": 1.505527138710022, "total_norm": 2.9202957153320312}
13897
+ {"epoch": 19, "step": 13849, "loss": 1.3932262659072876, "total_norm": 3.1585540771484375}
13898
+ {"epoch": 19, "step": 13850, "loss": 1.4453405141830444, "total_norm": 3.9404263496398926}
13899
+ {"epoch": 19, "step": 13851, "loss": 1.4993311166763306, "total_norm": 3.565655469894409}
13900
+ {"epoch": 19, "step": 13852, "loss": 1.3209376335144043, "total_norm": 1000000000.0}
13901
+ {"epoch": 19, "step": 13853, "loss": 1.2320630550384521, "total_norm": 3.703470230102539}
13902
+ {"epoch": 19, "step": 13854, "loss": 1.0778100490570068, "total_norm": 2.7854535579681396}
13903
+ {"epoch": 19, "step": 13855, "loss": 1.1900233030319214, "total_norm": 3.5618748664855957}
13904
+ {"epoch": 19, "step": 13856, "loss": 1.418336272239685, "total_norm": 3.9247360229492188}
13905
+ {"epoch": 19, "step": 13857, "loss": 1.3962862491607666, "total_norm": 3.8892414569854736}
13906
+ {"epoch": 19, "step": 13858, "loss": 1.6071925163269043, "total_norm": 3.2979087829589844}
13907
+ {"epoch": 19, "step": 13859, "loss": 1.4364650249481201, "total_norm": 3.4121174812316895}
13908
+ {"epoch": 19, "step": 13860, "loss": 1.3365478515625, "total_norm": 3.550231695175171}
13909
+ {"epoch": 19, "step": 13861, "loss": 1.5627654790878296, "total_norm": 4.092401027679443}
13910
+ {"epoch": 19, "step": 13862, "loss": 1.1997439861297607, "total_norm": 4.474003791809082}
13911
+ {"epoch": 19, "step": 13863, "loss": 1.2754185199737549, "total_norm": 3.21000075340271}
13912
+ {"epoch": 19, "step": 13864, "loss": 1.1819127798080444, "total_norm": 3.7186572551727295}
13913
+ {"epoch": 19, "step": 13865, "loss": 1.6163060665130615, "total_norm": 3.459256649017334}
13914
+ {"epoch": 19, "step": 13866, "loss": 1.329783320426941, "total_norm": 3.1614372730255127}
13915
+ {"epoch": 19, "step": 13867, "loss": 1.5284706354141235, "total_norm": 2.969125509262085}
13916
+ {"epoch": 19, "step": 13868, "loss": 1.4010642766952515, "total_norm": 3.6467723846435547}
13917
+ {"epoch": 19, "step": 13869, "loss": 1.160421371459961, "total_norm": 3.5480117797851562}
13918
+ {"epoch": 19, "step": 13870, "loss": 1.396873950958252, "total_norm": 3.163102865219116}
13919
+ {"epoch": 19, "step": 13871, "loss": 1.3057218790054321, "total_norm": 3.4624757766723633}
13920
+ {"epoch": 19, "step": 13872, "loss": 1.1014859676361084, "total_norm": 3.410196304321289}
13921
+ {"epoch": 19, "step": 13873, "loss": 1.4116594791412354, "total_norm": 3.8112151622772217}
13922
+ {"epoch": 19, "step": 13874, "loss": 1.2767568826675415, "total_norm": 3.0549490451812744}
13923
+ {"epoch": 19, "step": 13875, "loss": 1.2590755224227905, "total_norm": 4.377433776855469}
13924
+ {"epoch": 19, "step": 13876, "loss": 1.6154398918151855, "total_norm": 3.53367280960083}
13925
+ {"epoch": 19, "step": 13877, "loss": 1.1748899221420288, "total_norm": 4.4170098304748535}
13926
+ {"epoch": 19, "step": 13878, "loss": 1.2435578107833862, "total_norm": 3.2725231647491455}
13927
+ {"epoch": 19, "step": 13879, "loss": 1.1553277969360352, "total_norm": 3.2491989135742188}
13928
+ {"epoch": 19, "step": 13880, "loss": 1.2757415771484375, "total_norm": 3.508247137069702}
13929
+ {"epoch": 19, "step": 13881, "loss": 1.442395806312561, "total_norm": 3.4950437545776367}
13930
+ {"epoch": 19, "step": 13882, "loss": 1.2750465869903564, "total_norm": 3.2605695724487305}
13931
+ {"epoch": 19, "step": 13883, "loss": 1.2569211721420288, "total_norm": 3.2201404571533203}
13932
+ {"epoch": 19, "step": 13884, "loss": 1.4164104461669922, "total_norm": 3.1164166927337646}
13933
+ {"epoch": 19, "step": 13885, "loss": 1.4475655555725098, "total_norm": 4.433736801147461}
13934
+ {"epoch": 19, "step": 13886, "loss": 1.4798554182052612, "total_norm": 3.863006114959717}
13935
+ {"epoch": 19, "step": 13887, "loss": 0.9034466743469238, "total_norm": 3.5856900215148926}
13936
+ {"epoch": 19, "step": 13888, "loss": 1.360809564590454, "total_norm": 3.3562755584716797}
13937
+ {"epoch": 19, "step": 13889, "loss": 1.331078290939331, "total_norm": 3.5046794414520264}
13938
+ {"epoch": 19, "step": 13890, "loss": 1.6517996788024902, "total_norm": 3.5902092456817627}
13939
+ {"epoch": 19, "step": 13891, "loss": 1.4103026390075684, "total_norm": 3.936246395111084}
13940
+ {"epoch": 19, "step": 13892, "loss": 1.5611121654510498, "total_norm": 3.365882158279419}
13941
+ {"epoch": 19, "step": 13893, "loss": 1.3979692459106445, "total_norm": 3.8889269828796387}
13942
+ {"epoch": 19, "step": 13894, "loss": 1.3645174503326416, "total_norm": 3.578873634338379}
13943
+ {"epoch": 19, "step": 13895, "loss": 1.6218217611312866, "total_norm": 2.810274362564087}
13944
+ {"epoch": 19, "step": 13896, "loss": 1.1583831310272217, "total_norm": 3.7611944675445557}
13945
+ {"epoch": 19, "step": 13897, "loss": 1.394298791885376, "total_norm": 3.300049066543579}
13946
+ {"epoch": 19, "step": 13898, "loss": 1.250000238418579, "total_norm": 3.102980852127075}
13947
+ {"epoch": 19, "step": 13899, "loss": 1.0919123888015747, "total_norm": 3.142390251159668}
13948
+ {"epoch": 19, "step": 13900, "loss": 1.422178030014038, "total_norm": 5.304150581359863}
13949
+ {"epoch": 19, "step": 13901, "loss": 1.2336692810058594, "total_norm": 3.028583526611328}
13950
+ {"epoch": 19, "step": 13902, "loss": 1.3803560733795166, "total_norm": 3.5557689666748047}
13951
+ {"epoch": 19, "step": 13903, "loss": 1.2183988094329834, "total_norm": 3.9156503677368164}
13952
+ {"epoch": 19, "step": 13904, "loss": 0.8802387118339539, "total_norm": 3.606165647506714}
13953
+ {"epoch": 19, "step": 13905, "loss": 1.1896981000900269, "total_norm": 4.5046868324279785}
13954
+ {"epoch": 19, "step": 13906, "loss": 0.9525175094604492, "total_norm": 3.6612393856048584}
13955
+ {"epoch": 19, "step": 13907, "loss": 1.612856388092041, "total_norm": 3.0883309841156006}
13956
+ {"epoch": 19, "step": 13908, "loss": 1.513391375541687, "total_norm": 3.729346752166748}
13957
+ {"epoch": 19, "step": 13909, "loss": 1.148653268814087, "total_norm": 4.641780853271484}
13958
+ {"epoch": 19, "step": 13910, "loss": 1.6543266773223877, "total_norm": 2.7928805351257324}
13959
+ {"epoch": 19, "step": 13911, "loss": 1.5386483669281006, "total_norm": 4.298567295074463}
13960
+ {"epoch": 19, "step": 13912, "loss": 1.4389814138412476, "total_norm": 3.8287341594696045}
13961
+ {"epoch": 19, "step": 13913, "loss": 1.2729343175888062, "total_norm": 3.062652349472046}
13962
+ {"epoch": 19, "step": 13914, "loss": 1.0612868070602417, "total_norm": 3.249479293823242}
13963
+ {"epoch": 19, "step": 13915, "loss": 1.2633904218673706, "total_norm": 3.9912526607513428}
13964
+ {"epoch": 19, "step": 13916, "loss": 1.4231977462768555, "total_norm": 2.9683401584625244}
13965
+ {"epoch": 19, "step": 13917, "loss": 1.378708004951477, "total_norm": 2.927055597305298}
13966
+ {"epoch": 19, "step": 13918, "loss": 1.5940159559249878, "total_norm": 3.5961508750915527}
13967
+ {"epoch": 19, "step": 13919, "loss": 1.130767583847046, "total_norm": 2.4697446823120117}
13968
+ {"epoch": 19, "step": 13920, "loss": 1.3683284521102905, "total_norm": 3.448784351348877}
13969
+ {"epoch": 19, "step": 13921, "loss": 1.3640484809875488, "total_norm": 3.916837453842163}
13970
+ {"epoch": 19, "step": 13922, "loss": 1.3909060955047607, "total_norm": 5.089125156402588}
13971
+ {"epoch": 19, "step": 13923, "loss": 1.5455248355865479, "total_norm": 3.2558887004852295}
13972
+ {"epoch": 19, "step": 13924, "loss": 1.4394009113311768, "total_norm": 3.852135181427002}
13973
+ {"epoch": 19, "step": 13925, "loss": 1.275141954421997, "total_norm": 3.908691167831421}
13974
+ {"epoch": 19, "step": 13926, "loss": 1.152681827545166, "total_norm": 4.055678844451904}
13975
+ {"epoch": 19, "step": 13927, "loss": 1.3081291913986206, "total_norm": 3.0230889320373535}
13976
+ {"epoch": 19, "step": 13928, "loss": 1.2460763454437256, "total_norm": 3.732677698135376}
13977
+ {"epoch": 19, "step": 13929, "loss": 1.176497220993042, "total_norm": 4.429805278778076}
13978
+ {"epoch": 19, "step": 13930, "loss": 0.8283953666687012, "total_norm": 3.0156283378601074}
13979
+ {"epoch": 19, "step": 13931, "loss": 1.48934006690979, "total_norm": 3.755343437194824}
13980
+ {"epoch": 19, "step": 13932, "loss": 1.5311486721038818, "total_norm": 3.4605069160461426}
13981
+ {"epoch": 19, "step": 13933, "loss": 1.5039645433425903, "total_norm": 3.0157032012939453}
13982
+ {"epoch": 19, "step": 13934, "loss": 1.0752419233322144, "total_norm": 3.8941190242767334}
13983
+ {"epoch": 19, "step": 13935, "loss": 1.2608106136322021, "total_norm": 3.491238832473755}
13984
+ {"epoch": 19, "step": 13936, "loss": 1.1125692129135132, "total_norm": 3.603640556335449}
13985
+ {"epoch": 19, "step": 13937, "loss": 1.3463929891586304, "total_norm": 3.6090757846832275}
13986
+ {"epoch": 19, "step": 13938, "loss": 0.5188153982162476, "total_norm": 2.4292819499969482}
13987
+ {"epoch": 19, "step": 13939, "loss": 1.3678768873214722, "total_norm": 3.9492199420928955}
13988
+ {"epoch": 19, "step": 13940, "loss": 1.2560675144195557, "total_norm": 3.166985511779785}
13989
+ {"epoch": 19, "step": 13941, "loss": 1.4024527072906494, "total_norm": 4.027840614318848}
13990
+ {"epoch": 19, "step": 13942, "loss": 1.4701975584030151, "total_norm": 3.8379533290863037}
13991
+ {"epoch": 19, "step": 13943, "loss": 1.4902639389038086, "total_norm": 3.846371650695801}
13992
+ {"epoch": 19, "step": 13944, "loss": 1.263871431350708, "total_norm": 3.697085380554199}
13993
+ {"epoch": 19, "step": 13945, "loss": 1.3626142740249634, "total_norm": 3.6297314167022705}
13994
+ {"epoch": 19, "step": 13946, "loss": 1.4741934537887573, "total_norm": 2.8774778842926025}
13995
+ {"epoch": 19, "step": 13947, "loss": 1.2115824222564697, "total_norm": 3.2660515308380127}
13996
+ {"epoch": 19, "step": 13948, "loss": 1.2000272274017334, "total_norm": 4.039942741394043}
13997
+ {"epoch": 19, "step": 13949, "loss": 1.3280689716339111, "total_norm": 3.496654510498047}
13998
+ {"epoch": 19, "step": 13950, "loss": 1.2826392650604248, "total_norm": 4.674089431762695}
13999
+ {"epoch": 19, "step": 13951, "loss": 1.4711885452270508, "total_norm": 4.6338019371032715}
14000
+ {"epoch": 19, "step": 13952, "loss": 1.4359925985336304, "total_norm": 2.9124433994293213}
14001
+ {"epoch": 19, "step": 13953, "loss": 1.4109938144683838, "total_norm": 3.2085156440734863}
14002
+ {"epoch": 19, "step": 13954, "loss": 1.5499255657196045, "total_norm": 3.756131410598755}
14003
+ {"epoch": 19, "step": 13955, "loss": 1.408119797706604, "total_norm": 3.998027801513672}
14004
+ {"epoch": 19, "step": 13956, "loss": 1.0749186277389526, "total_norm": 4.241844177246094}
14005
+ {"epoch": 19, "step": 13957, "loss": 1.3911609649658203, "total_norm": 3.1060664653778076}
14006
+ {"epoch": 19, "step": 13958, "loss": 1.3904567956924438, "total_norm": 3.048487424850464}
14007
+ {"epoch": 19, "step": 13959, "loss": 1.0279676914215088, "total_norm": 3.652594804763794}
14008
+ {"epoch": 19, "step": 13960, "loss": 1.3084033727645874, "total_norm": 3.617670774459839}
14009
+ {"epoch": 19, "step": 13961, "loss": 1.2361332178115845, "total_norm": 4.130348205566406}
14010
+ {"epoch": 19, "step": 13962, "loss": 1.562913417816162, "total_norm": 3.1697187423706055}
14011
+ {"epoch": 19, "step": 13963, "loss": 1.3418010473251343, "total_norm": 4.553969383239746}
14012
+ {"epoch": 19, "step": 13964, "loss": 1.5367666482925415, "total_norm": 2.6793220043182373}
14013
+ {"epoch": 19, "step": 13965, "loss": 0.8817513585090637, "total_norm": 2.1981201171875}
14014
+ {"epoch": 19, "step": 13966, "loss": 1.1535142660140991, "total_norm": 3.142029047012329}
14015
+ {"epoch": 19, "step": 13967, "loss": 1.560259222984314, "total_norm": 4.089107036590576}
14016
+ {"epoch": 19, "step": 13968, "loss": 1.2514504194259644, "total_norm": 2.9912619590759277}
14017
+ {"epoch": 19, "step": 13969, "loss": 1.2876653671264648, "total_norm": 3.5371830463409424}
14018
+ {"epoch": 19, "step": 13970, "loss": 1.4682369232177734, "total_norm": 3.911827564239502}
14019
+ {"epoch": 19, "step": 13971, "loss": 1.3976976871490479, "total_norm": 3.123839855194092}
14020
+ {"epoch": 19, "step": 13972, "loss": 1.0377702713012695, "total_norm": 4.058869361877441}
14021
+ {"epoch": 19, "step": 13973, "loss": 1.1659239530563354, "total_norm": 3.504319667816162}
14022
+ {"epoch": 19, "step": 13974, "loss": 1.5559988021850586, "total_norm": 3.0134479999542236}
14023
+ {"epoch": 19, "step": 13975, "loss": 1.4283263683319092, "total_norm": 3.764871835708618}
14024
+ {"epoch": 19, "step": 13976, "loss": 1.4629898071289062, "total_norm": 3.619323968887329}
14025
+ {"epoch": 19, "step": 13977, "loss": 1.0305536985397339, "total_norm": 4.2964348793029785}
14026
+ {"epoch": 19, "step": 13978, "loss": 1.3385525941848755, "total_norm": 3.8650126457214355}
14027
+ {"epoch": 19, "step": 13979, "loss": 1.2648903131484985, "total_norm": 3.7171952724456787}
14028
+ {"epoch": 19, "step": 13980, "loss": 1.2762843370437622, "total_norm": 2.614628314971924}
14029
+ {"epoch": 19, "step": 13981, "loss": 1.4799822568893433, "total_norm": 3.762803792953491}
14030
+ {"epoch": 19, "step": 13982, "loss": 1.0561119318008423, "total_norm": 3.432504177093506}
14031
+ {"epoch": 19, "step": 13983, "loss": 1.4376717805862427, "total_norm": 3.2232375144958496}
14032
+ {"epoch": 19, "step": 13984, "loss": 1.8607110977172852, "total_norm": 3.2526493072509766}
14033
+ {"epoch": 19, "step": 13985, "loss": 1.3441150188446045, "total_norm": 3.713226079940796}
14034
+ {"epoch": 19, "step": 13986, "loss": 1.0350557565689087, "total_norm": 3.399580955505371}
14035
+ {"epoch": 19, "step": 13987, "loss": 1.1008496284484863, "total_norm": 3.991511583328247}
14036
+ {"epoch": 19, "step": 13988, "loss": 1.4017677307128906, "total_norm": 3.5343847274780273}
14037
+ {"epoch": 19, "step": 13989, "loss": 1.190545916557312, "total_norm": 3.267096519470215}
14038
+ {"epoch": 19, "step": 13990, "loss": 1.315282940864563, "total_norm": 4.360154151916504}
14039
+ {"epoch": 19, "step": 13991, "loss": 1.317928433418274, "total_norm": 3.1845855712890625}
14040
+ {"epoch": 19, "step": 13992, "loss": 1.182515025138855, "total_norm": 3.040846347808838}
14041
+ {"epoch": 19, "step": 13993, "loss": 1.5769376754760742, "total_norm": 4.659018039703369}
14042
+ {"epoch": 19, "step": 13994, "loss": 1.3778996467590332, "total_norm": 2.988072395324707}
14043
+ {"epoch": 19, "step": 13995, "loss": 1.3817700147628784, "total_norm": 4.417616367340088}
14044
+ {"epoch": 19, "step": 13996, "loss": 1.2910587787628174, "total_norm": 3.058441400527954}
14045
+ {"epoch": 19, "step": 13997, "loss": 1.1234253644943237, "total_norm": 4.422340393066406}
14046
+ {"epoch": 19, "step": 13998, "loss": 1.1198488473892212, "total_norm": 2.87886118888855}
14047
+ {"epoch": 19, "step": 13999, "loss": 1.6561068296432495, "total_norm": 3.2926084995269775}
14048
+ {"epoch": 19, "step": 14000, "eval_loss": 3.1131076514720917, "eval_rougeL": 0.11258248826184435}
14049
+ {"epoch": 19, "step": 14000, "loss": 1.4486746788024902, "total_norm": 3.31862473487854}
14050
+ {"epoch": 19, "step": 14001, "loss": 1.4579111337661743, "total_norm": 3.1650030612945557}
14051
+ {"epoch": 19, "step": 14002, "loss": 1.0276882648468018, "total_norm": 4.229379177093506}
14052
+ {"epoch": 19, "step": 14003, "loss": 1.15628182888031, "total_norm": 3.475034713745117}
14053
+ {"epoch": 19, "step": 14004, "loss": 1.37619948387146, "total_norm": 4.4361162185668945}
14054
+ {"epoch": 19, "step": 14005, "loss": 1.189640998840332, "total_norm": 4.292090892791748}
14055
+ {"epoch": 19, "step": 14006, "loss": 1.442291498184204, "total_norm": 3.1540520191192627}
14056
+ {"epoch": 19, "step": 14007, "loss": 1.3953479528427124, "total_norm": 2.7646453380584717}
14057
+ {"epoch": 19, "step": 14008, "loss": 1.2389835119247437, "total_norm": 3.307893753051758}
14058
+ {"epoch": 19, "step": 14009, "loss": 1.1778323650360107, "total_norm": 4.514660358428955}
14059
+ {"epoch": 19, "step": 14010, "loss": 1.20244300365448, "total_norm": 4.051914215087891}
14060
+ {"epoch": 19, "step": 14011, "loss": 1.4827698469161987, "total_norm": 3.4567668437957764}
14061
+ {"epoch": 19, "step": 14012, "loss": 1.2588380575180054, "total_norm": 3.5959253311157227}
14062
+ {"epoch": 19, "step": 14013, "loss": 1.258537769317627, "total_norm": 4.171225547790527}
14063
+ {"epoch": 19, "step": 14014, "loss": 1.2218974828720093, "total_norm": 3.623368740081787}
14064
+ {"epoch": 19, "step": 14015, "loss": 1.1256859302520752, "total_norm": 4.2674150466918945}
14065
+ {"epoch": 19, "step": 14016, "loss": 1.5315922498703003, "total_norm": 2.9174654483795166}
14066
+ {"epoch": 19, "step": 14017, "loss": 1.5094130039215088, "total_norm": 2.9342193603515625}
14067
+ {"epoch": 19, "step": 14018, "loss": 1.376240849494934, "total_norm": 4.668813228607178}
14068
+ {"epoch": 19, "step": 14019, "loss": 1.4617420434951782, "total_norm": 3.0133440494537354}
14069
+ {"epoch": 19, "step": 14020, "loss": 1.6758153438568115, "total_norm": 3.5599782466888428}
14070
+ {"epoch": 19, "step": 14021, "loss": 1.3301680088043213, "total_norm": 3.748786211013794}
14071
+ {"epoch": 19, "step": 14022, "loss": 1.1876527070999146, "total_norm": 3.7101492881774902}
14072
+ {"epoch": 19, "step": 14023, "loss": 1.3718043565750122, "total_norm": 2.9470324516296387}
14073
+ {"epoch": 19, "step": 14024, "loss": 1.3369330167770386, "total_norm": 3.273040533065796}
14074
+ {"epoch": 19, "step": 14025, "loss": 1.707478404045105, "total_norm": 2.987119197845459}
14075
+ {"epoch": 19, "step": 14026, "loss": 1.3158237934112549, "total_norm": 3.013349771499634}
14076
+ {"epoch": 19, "step": 14027, "loss": 1.8988077640533447, "total_norm": 3.7954092025756836}
14077
+ {"epoch": 19, "step": 14028, "loss": 1.227414846420288, "total_norm": 3.5639753341674805}
14078
+ {"epoch": 19, "step": 14029, "loss": 1.4326450824737549, "total_norm": 3.378645658493042}
14079
+ {"epoch": 19, "step": 14030, "loss": 1.3568103313446045, "total_norm": 3.0825252532958984}
14080
+ {"epoch": 19, "step": 14031, "loss": 1.6860133409500122, "total_norm": 3.3246707916259766}
14081
+ {"epoch": 19, "step": 14032, "loss": 1.1100249290466309, "total_norm": 3.2908923625946045}
14082
+ {"epoch": 19, "step": 14033, "loss": 1.4681649208068848, "total_norm": 3.593567132949829}
14083
+ {"epoch": 19, "step": 14034, "loss": 0.6500354409217834, "total_norm": 5.056446552276611}
14084
+ {"epoch": 19, "step": 14035, "loss": 1.4157967567443848, "total_norm": 3.5897274017333984}
14085
+ {"epoch": 19, "step": 14036, "loss": 1.0930641889572144, "total_norm": 3.1707873344421387}
14086
+ {"epoch": 19, "step": 14037, "loss": 1.404051423072815, "total_norm": 3.766655683517456}
14087
+ {"epoch": 19, "step": 14038, "loss": 1.2189825773239136, "total_norm": 3.5552704334259033}
14088
+ {"epoch": 19, "step": 14039, "loss": 1.343224287033081, "total_norm": 3.890875816345215}
14089
+ {"epoch": 19, "step": 14040, "loss": 1.1207574605941772, "total_norm": 3.525996208190918}
14090
+ {"epoch": 19, "step": 14041, "loss": 1.4440884590148926, "total_norm": 3.7399864196777344}
14091
+ {"epoch": 19, "step": 14042, "loss": 1.481415033340454, "total_norm": 3.026045560836792}
14092
+ {"epoch": 19, "step": 14043, "loss": 1.0720912218093872, "total_norm": 2.9777517318725586}
14093
+ {"epoch": 19, "step": 14044, "loss": 0.9973153471946716, "total_norm": 3.0150887966156006}
14094
+ {"epoch": 19, "step": 14045, "loss": 1.2502543926239014, "total_norm": 3.888038158416748}
14095
+ {"epoch": 19, "step": 14046, "loss": 1.330197811126709, "total_norm": 3.6867103576660156}
14096
+ {"epoch": 19, "step": 14047, "loss": 1.3834658861160278, "total_norm": 4.377874851226807}
14097
+ {"epoch": 19, "step": 14048, "loss": 1.217374324798584, "total_norm": 4.267746448516846}
14098
+ {"epoch": 19, "step": 14049, "loss": 1.516808271408081, "total_norm": 3.6609890460968018}
14099
+ {"epoch": 19, "step": 14050, "loss": 1.31804621219635, "total_norm": 3.280810594558716}
14100
+ {"epoch": 19, "step": 14051, "loss": 1.7038323879241943, "total_norm": 3.18331241607666}
14101
+ {"epoch": 19, "step": 14052, "loss": 1.6530207395553589, "total_norm": 4.117761611938477}
14102
+ {"epoch": 19, "step": 14053, "loss": 1.411211371421814, "total_norm": 3.4053266048431396}
14103
+ {"epoch": 19, "step": 14054, "loss": 1.5339051485061646, "total_norm": 3.5721542835235596}
14104
+ {"epoch": 19, "step": 14055, "loss": 1.3184365034103394, "total_norm": 3.9364192485809326}
14105
+ {"epoch": 19, "step": 14056, "loss": 1.3035435676574707, "total_norm": 5.077311038970947}
14106
+ {"epoch": 19, "step": 14057, "loss": 1.1203173398971558, "total_norm": 4.2697906494140625}
14107
+ {"epoch": 19, "step": 14058, "loss": 1.3390730619430542, "total_norm": 3.426119804382324}
14108
+ {"epoch": 19, "step": 14059, "loss": 1.126966953277588, "total_norm": 3.7569832801818848}
14109
+ {"epoch": 19, "step": 14060, "loss": 0.9648793935775757, "total_norm": 3.0948643684387207}
14110
+ {"epoch": 19, "step": 14061, "loss": 1.481185793876648, "total_norm": 3.748098611831665}
14111
+ {"epoch": 19, "step": 14062, "loss": 1.561226725578308, "total_norm": 3.930525541305542}
14112
+ {"epoch": 19, "step": 14063, "loss": 1.4304214715957642, "total_norm": 3.2301955223083496}
14113
+ {"epoch": 19, "step": 14064, "loss": 1.019722580909729, "total_norm": 3.737152099609375}
14114
+ {"epoch": 19, "step": 14065, "loss": 1.6184202432632446, "total_norm": 3.5142557621002197}
14115
+ {"epoch": 19, "step": 14066, "loss": 1.8195992708206177, "total_norm": 3.0546388626098633}
14116
+ {"epoch": 19, "step": 14067, "loss": 1.6313087940216064, "total_norm": 3.365312337875366}
14117
+ {"epoch": 19, "step": 14068, "loss": 1.165440559387207, "total_norm": 4.016746520996094}
14118
+ {"epoch": 19, "step": 14069, "loss": 1.1371691226959229, "total_norm": 3.215895891189575}
14119
+ {"epoch": 19, "step": 14070, "loss": 1.4943372011184692, "total_norm": 3.6633715629577637}
14120
+ {"epoch": 19, "step": 14071, "loss": 1.4997023344039917, "total_norm": 3.8102355003356934}
14121
+ {"epoch": 19, "step": 14072, "loss": 1.434981107711792, "total_norm": 3.742481231689453}
14122
+ {"epoch": 19, "step": 14073, "loss": 1.5176267623901367, "total_norm": 3.262881278991699}
14123
+ {"epoch": 19, "step": 14074, "loss": 1.5923515558242798, "total_norm": 3.5909736156463623}
14124
+ {"epoch": 19, "step": 14075, "loss": 1.6387115716934204, "total_norm": 3.188292980194092}
14125
+ {"epoch": 19, "step": 14076, "loss": 1.5126436948776245, "total_norm": 3.640908718109131}
14126
+ {"epoch": 19, "step": 14077, "loss": 1.2023791074752808, "total_norm": 3.291428327560425}
14127
+ {"epoch": 19, "step": 14078, "loss": 1.3068734407424927, "total_norm": 3.550781488418579}
14128
+ {"epoch": 19, "step": 14079, "loss": 0.8522324562072754, "total_norm": 4.30126428604126}
14129
+ {"epoch": 19, "step": 14080, "loss": 1.1942012310028076, "total_norm": 4.933928489685059}
14130
+ {"epoch": 19, "step": 14081, "loss": 1.360261082649231, "total_norm": 3.219931125640869}
14131
+ {"epoch": 19, "step": 14082, "loss": 1.32433021068573, "total_norm": 3.2378222942352295}
14132
+ {"epoch": 19, "step": 14083, "loss": 1.6470221281051636, "total_norm": 3.1097030639648438}
14133
+ {"epoch": 19, "step": 14084, "loss": 1.4581066370010376, "total_norm": 2.847804546356201}
14134
+ {"epoch": 19, "step": 14085, "loss": 1.4894130229949951, "total_norm": 3.8125722408294678}
14135
+ {"epoch": 19, "step": 14086, "loss": 1.222298264503479, "total_norm": 3.5746688842773438}
14136
+ {"epoch": 19, "step": 14087, "loss": 1.283980369567871, "total_norm": 2.960320234298706}
14137
+ {"epoch": 19, "step": 14088, "loss": 1.3748301267623901, "total_norm": 3.949843168258667}
14138
+ {"epoch": 19, "step": 14089, "loss": 1.6329991817474365, "total_norm": 3.9777119159698486}
14139
+ {"epoch": 19, "step": 14090, "loss": 1.2210317850112915, "total_norm": 4.349681854248047}
14140
+ {"epoch": 19, "step": 14091, "loss": 1.1484496593475342, "total_norm": 3.670325756072998}
14141
+ {"epoch": 19, "step": 14092, "loss": 1.4503482580184937, "total_norm": 3.955756425857544}
14142
+ {"epoch": 19, "step": 14093, "loss": 1.1896182298660278, "total_norm": 3.713452100753784}
14143
+ {"epoch": 19, "step": 14094, "loss": 1.204245686531067, "total_norm": 3.986173629760742}
14144
+ {"epoch": 19, "step": 14095, "loss": 1.2290568351745605, "total_norm": 3.213573455810547}
14145
+ {"epoch": 19, "step": 14096, "loss": 1.6256989240646362, "total_norm": 4.280498504638672}
14146
+ {"epoch": 19, "step": 14097, "loss": 1.2025058269500732, "total_norm": 3.6078336238861084}
14147
+ {"epoch": 19, "step": 14098, "loss": 0.6266629695892334, "total_norm": 3.5308022499084473}
14148
+ {"epoch": 19, "step": 14099, "loss": 1.3503965139389038, "total_norm": 3.235090970993042}
14149
+ {"epoch": 19, "step": 14100, "loss": 1.3603423833847046, "total_norm": 3.2848446369171143}
14150
+ {"epoch": 19, "step": 14101, "loss": 1.6433576345443726, "total_norm": 3.2997303009033203}
14151
+ {"epoch": 19, "step": 14102, "loss": 1.2110967636108398, "total_norm": 3.525378942489624}
14152
+ {"epoch": 19, "step": 14103, "loss": 1.3460135459899902, "total_norm": 4.0301666259765625}
14153
+ {"epoch": 19, "step": 14104, "loss": 1.6904802322387695, "total_norm": 3.26029634475708}
14154
+ {"epoch": 19, "step": 14105, "loss": 1.094543218612671, "total_norm": 3.930649995803833}
14155
+ {"epoch": 19, "step": 14106, "loss": 1.4158648252487183, "total_norm": 3.3469033241271973}
14156
+ {"epoch": 19, "step": 14107, "loss": 1.3062901496887207, "total_norm": 2.5403378009796143}
14157
+ {"epoch": 19, "step": 14108, "loss": 1.3205429315567017, "total_norm": 4.711584568023682}
14158
+ {"epoch": 19, "step": 14109, "loss": 1.5160243511199951, "total_norm": 3.227591037750244}
14159
+ {"epoch": 19, "step": 14110, "loss": 0.9527182579040527, "total_norm": 4.108139514923096}
14160
+ {"epoch": 19, "step": 14111, "loss": 1.4007989168167114, "total_norm": 3.1189332008361816}
14161
+ {"epoch": 19, "step": 14112, "loss": 1.5058281421661377, "total_norm": 3.545809507369995}
14162
+ {"epoch": 19, "step": 14113, "loss": 1.3436439037322998, "total_norm": 3.2531440258026123}
14163
+ {"epoch": 19, "step": 14114, "loss": 1.421860694885254, "total_norm": 3.2542011737823486}
14164
+ {"epoch": 19, "step": 14115, "loss": 1.5227891206741333, "total_norm": 3.6724774837493896}
14165
+ {"epoch": 19, "step": 14116, "loss": 1.443095088005066, "total_norm": 2.737286329269409}
14166
+ {"epoch": 19, "step": 14117, "loss": 1.1955620050430298, "total_norm": 4.0276103019714355}
14167
+ {"epoch": 19, "step": 14118, "loss": 0.9603585600852966, "total_norm": 4.289978981018066}
14168
+ {"epoch": 19, "step": 14119, "loss": 1.5837066173553467, "total_norm": 3.611142158508301}
14169
+ {"epoch": 19, "step": 14120, "loss": 1.3342534303665161, "total_norm": 3.0907530784606934}
14170
+ {"epoch": 19, "step": 14121, "loss": 1.2937818765640259, "total_norm": 4.528342247009277}
14171
+ {"epoch": 19, "step": 14122, "loss": 1.0420963764190674, "total_norm": 3.277010917663574}
14172
+ {"epoch": 19, "step": 14123, "loss": 1.0199909210205078, "total_norm": 3.166001319885254}
14173
+ {"epoch": 19, "step": 14124, "loss": 1.1608738899230957, "total_norm": 3.187167167663574}
14174
+ {"epoch": 19, "step": 14125, "loss": 1.4473011493682861, "total_norm": 4.725062370300293}
14175
+ {"epoch": 19, "step": 14126, "loss": 1.2178456783294678, "total_norm": 3.41794753074646}
14176
+ {"epoch": 19, "step": 14127, "loss": 1.3315527439117432, "total_norm": 3.9985413551330566}
14177
+ {"epoch": 19, "step": 14128, "loss": 1.3368330001831055, "total_norm": 4.514730930328369}
14178
+ {"epoch": 19, "step": 14129, "loss": 1.4257457256317139, "total_norm": 4.3133225440979}
14179
+ {"epoch": 19, "step": 14130, "loss": 1.3046735525131226, "total_norm": 3.7147576808929443}
14180
+ {"epoch": 19, "step": 14131, "loss": 1.4335259199142456, "total_norm": 3.448401927947998}
14181
+ {"epoch": 19, "step": 14132, "loss": 1.4065020084381104, "total_norm": 3.7620787620544434}
14182
+ {"epoch": 19, "step": 14133, "loss": 1.1066087484359741, "total_norm": 4.138252258300781}
14183
+ {"epoch": 19, "step": 14134, "loss": 1.2815513610839844, "total_norm": 3.503453016281128}
14184
+ {"epoch": 19, "step": 14135, "loss": 1.2718195915222168, "total_norm": 3.289595365524292}
14185
+ {"epoch": 19, "step": 14136, "loss": 1.1435898542404175, "total_norm": 4.217740535736084}
14186
+ {"epoch": 19, "step": 14137, "loss": 1.2311439514160156, "total_norm": 3.6355016231536865}
14187
+ {"epoch": 19, "step": 14138, "loss": 1.4844903945922852, "total_norm": 3.4670755863189697}
14188
+ {"epoch": 19, "step": 14139, "loss": 1.445171594619751, "total_norm": 3.7543439865112305}
14189
+ {"epoch": 19, "step": 14140, "loss": 1.2595081329345703, "total_norm": 2.84796142578125}
14190
+ {"epoch": 19, "step": 14141, "loss": 1.4658671617507935, "total_norm": 3.3623595237731934}
14191
+ {"epoch": 19, "step": 14142, "loss": 1.038564920425415, "total_norm": 3.6918647289276123}
14192
+ {"epoch": 19, "step": 14143, "loss": 1.445712924003601, "total_norm": 3.306586980819702}
14193
+ {"epoch": 19, "step": 14144, "loss": 1.213793396949768, "total_norm": 3.574176549911499}
14194
+ {"epoch": 19, "step": 14145, "loss": 1.2481062412261963, "total_norm": 4.121716022491455}
14195
+ {"epoch": 19, "step": 14146, "loss": 1.6451507806777954, "total_norm": 3.5474770069122314}
14196
+ {"epoch": 19, "step": 14147, "loss": 1.5447973012924194, "total_norm": 3.393073558807373}
14197
+ {"epoch": 19, "step": 14148, "loss": 1.1812303066253662, "total_norm": 4.193353652954102}
14198
+ {"epoch": 19, "step": 14149, "loss": 1.2025116682052612, "total_norm": 3.486402988433838}
14199
+ {"epoch": 19, "step": 14150, "loss": 1.5411745309829712, "total_norm": 3.257636070251465}
14200
+ {"epoch": 19, "step": 14151, "loss": 1.2925987243652344, "total_norm": 3.4548745155334473}
14201
+ {"epoch": 19, "step": 14152, "loss": 1.088512659072876, "total_norm": 2.978032112121582}
14202
+ {"epoch": 19, "step": 14153, "loss": 1.4314992427825928, "total_norm": 4.2370429039001465}
14203
+ {"epoch": 19, "step": 14154, "loss": 1.3493762016296387, "total_norm": 3.1702427864074707}
14204
+ {"epoch": 19, "step": 14155, "loss": 1.3379883766174316, "total_norm": 3.7126057147979736}
14205
+ {"epoch": 19, "step": 14156, "loss": 1.323988437652588, "total_norm": 3.1862757205963135}
14206
+ {"epoch": 19, "step": 14157, "loss": 1.6125900745391846, "total_norm": 2.718179941177368}
14207
+ {"epoch": 19, "step": 14158, "loss": 1.4303442239761353, "total_norm": 4.540006637573242}
14208
+ {"epoch": 19, "step": 14159, "loss": 1.5292736291885376, "total_norm": 4.2932448387146}
14209
+ {"epoch": 19, "step": 14160, "loss": 1.5301117897033691, "total_norm": 3.784507989883423}
14210
+ {"epoch": 19, "step": 14161, "loss": 1.6105931997299194, "total_norm": 3.3341615200042725}
14211
+ {"epoch": 19, "step": 14162, "loss": 0.9185343384742737, "total_norm": 3.9440557956695557}
14212
+ {"epoch": 19, "step": 14163, "loss": 1.3019683361053467, "total_norm": 3.6302173137664795}
14213
+ {"epoch": 19, "step": 14164, "loss": 1.0027467012405396, "total_norm": 3.3890762329101562}
14214
+ {"epoch": 19, "step": 14165, "loss": 1.2155451774597168, "total_norm": 5.132282257080078}
14215
+ {"epoch": 19, "step": 14166, "loss": 1.421496868133545, "total_norm": 3.204800844192505}
14216
+ {"epoch": 19, "step": 14167, "loss": 1.468731164932251, "total_norm": 4.450320720672607}
14217
+ {"epoch": 19, "step": 14168, "loss": 1.317943811416626, "total_norm": 2.7068021297454834}
14218
+ {"epoch": 19, "step": 14169, "loss": 1.3533121347427368, "total_norm": 3.0871012210845947}
14219
+ {"epoch": 19, "step": 14170, "loss": 1.5318094491958618, "total_norm": 3.6953353881835938}
14220
+ {"epoch": 19, "step": 14171, "loss": 0.9843353033065796, "total_norm": 3.039132833480835}
14221
+ {"epoch": 19, "step": 14172, "loss": 0.9806560277938843, "total_norm": 3.3177735805511475}
14222
+ {"epoch": 19, "step": 14173, "loss": 1.5736825466156006, "total_norm": 3.500688314437866}
14223
+ {"epoch": 19, "step": 14174, "loss": 1.2323366403579712, "total_norm": 4.023686408996582}
14224
+ {"epoch": 19, "step": 14175, "loss": 1.529475212097168, "total_norm": 3.796372413635254}
14225
+ {"epoch": 19, "step": 14176, "loss": 1.4680604934692383, "total_norm": 3.0900046825408936}
14226
+ {"epoch": 19, "step": 14177, "loss": 1.5935028791427612, "total_norm": 3.540900945663452}
14227
+ {"epoch": 19, "step": 14178, "loss": 1.2685668468475342, "total_norm": 3.113795518875122}
14228
+ {"epoch": 19, "step": 14179, "loss": 1.4529211521148682, "total_norm": 4.309112548828125}
14229
+ {"epoch": 19, "step": 14180, "loss": 1.1902700662612915, "total_norm": 2.6482505798339844}
14230
+ {"epoch": 19, "step": 14181, "loss": 1.4291486740112305, "total_norm": 2.9848499298095703}
14231
+ {"epoch": 19, "step": 14182, "loss": 1.1672406196594238, "total_norm": 3.133439302444458}
14232
+ {"epoch": 19, "step": 14183, "loss": 1.249977469444275, "total_norm": 3.3811304569244385}
14233
+ {"epoch": 19, "step": 14184, "loss": 0.9177681803703308, "total_norm": 2.9643146991729736}
14234
+ {"epoch": 19, "step": 14185, "loss": 1.5210496187210083, "total_norm": 4.334852695465088}
14235
+ {"epoch": 19, "step": 14186, "loss": 0.9007498621940613, "total_norm": 3.3142757415771484}
14236
+ {"epoch": 19, "step": 14187, "loss": 1.0281555652618408, "total_norm": 3.8977625370025635}
14237
+ {"epoch": 19, "step": 14188, "loss": 1.3552738428115845, "total_norm": 2.9916906356811523}
14238
+ {"epoch": 19, "step": 14189, "loss": 1.4616773128509521, "total_norm": 3.547591209411621}
14239
+ {"epoch": 19, "step": 14190, "loss": 1.244511604309082, "total_norm": 3.9999277591705322}
14240
+ {"epoch": 19, "step": 14191, "loss": 1.1122323274612427, "total_norm": 3.537202835083008}
14241
+ {"epoch": 19, "step": 14192, "loss": 1.5292298793792725, "total_norm": 3.093743324279785}
14242
+ {"epoch": 19, "step": 14193, "loss": 1.1827176809310913, "total_norm": 3.543661594390869}
14243
+ {"epoch": 19, "step": 14194, "loss": 1.5575578212738037, "total_norm": 3.706979513168335}
14244
+ {"epoch": 19, "step": 14195, "loss": 1.4065159559249878, "total_norm": 4.1100640296936035}
14245
+ {"epoch": 19, "step": 14196, "loss": 1.2337658405303955, "total_norm": 3.211822271347046}
14246
+ {"epoch": 19, "step": 14197, "loss": 1.2301338911056519, "total_norm": 3.1758413314819336}
14247
+ {"epoch": 19, "step": 14198, "loss": 1.221261978149414, "total_norm": 3.4750149250030518}
14248
+ {"epoch": 19, "step": 14199, "loss": 1.2557015419006348, "total_norm": 4.0359721183776855}
14249
+ {"epoch": 19, "step": 14200, "loss": 1.3456073999404907, "total_norm": 3.573255777359009}
14250
+ {"epoch": 19, "step": 14201, "loss": 1.4154207706451416, "total_norm": 4.582620620727539}
14251
+ {"epoch": 19, "step": 14202, "loss": 1.2677044868469238, "total_norm": 4.049921035766602}
14252
+ {"epoch": 19, "step": 14203, "loss": 1.402765154838562, "total_norm": 3.363396644592285}
14253
+ {"epoch": 19, "step": 14204, "loss": 1.7865591049194336, "total_norm": 3.305708646774292}
14254
+ {"epoch": 19, "step": 14205, "loss": 1.1999742984771729, "total_norm": 3.124727725982666}
14255
+ {"epoch": 19, "step": 14206, "loss": 1.5784144401550293, "total_norm": 3.4107565879821777}
14256
+ {"epoch": 19, "step": 14207, "loss": 1.4493499994277954, "total_norm": 3.5438122749328613}
14257
+ {"epoch": 19, "step": 14208, "loss": 1.4320155382156372, "total_norm": 3.418236255645752}
14258
+ {"epoch": 19, "step": 14209, "loss": 1.6581913232803345, "total_norm": 3.297144889831543}
14259
+ {"epoch": 19, "step": 14210, "loss": 1.3139245510101318, "total_norm": 4.366439342498779}
14260
+ {"epoch": 19, "step": 14211, "loss": 1.4764115810394287, "total_norm": 4.103246212005615}
14261
+ {"epoch": 19, "step": 14212, "loss": 1.4076372385025024, "total_norm": 2.8685309886932373}
14262
+ {"epoch": 19, "step": 14213, "loss": 1.5355041027069092, "total_norm": 3.560957431793213}
14263
+ {"epoch": 19, "step": 14214, "loss": 1.7394028902053833, "total_norm": 3.791900873184204}
14264
+ {"epoch": 19, "step": 14215, "loss": 1.5689345598220825, "total_norm": 3.5204238891601562}
14265
+ {"epoch": 19, "step": 14216, "loss": 1.4906209707260132, "total_norm": 3.9022459983825684}
14266
+ {"epoch": 19, "step": 14217, "loss": 1.2074592113494873, "total_norm": 3.0134124755859375}
14267
+ {"epoch": 19, "step": 14218, "loss": 1.251356840133667, "total_norm": 3.4126813411712646}
14268
+ {"epoch": 19, "step": 14219, "loss": 1.1172226667404175, "total_norm": 3.1893606185913086}
14269
+ {"epoch": 19, "step": 14220, "loss": 1.1104975938796997, "total_norm": 3.589395046234131}
14270
+ {"epoch": 19, "step": 14221, "loss": 1.6921499967575073, "total_norm": 3.91817045211792}
14271
+ {"epoch": 19, "step": 14222, "loss": 1.489885926246643, "total_norm": 3.376293897628784}
14272
+ {"epoch": 19, "step": 14223, "loss": 0.7472814321517944, "total_norm": 3.1947059631347656}
14273
+ {"epoch": 19, "step": 14224, "loss": 1.4926884174346924, "total_norm": 3.5955045223236084}
14274
+ {"epoch": 19, "step": 14225, "loss": 1.1952569484710693, "total_norm": 3.7403857707977295}
14275
+ {"epoch": 19, "step": 14226, "loss": 1.0996593236923218, "total_norm": 3.132605791091919}
14276
+ {"epoch": 19, "step": 14227, "loss": 1.466225266456604, "total_norm": 3.876025438308716}
14277
+ {"epoch": 19, "step": 14228, "loss": 1.2267988920211792, "total_norm": 3.8541359901428223}
14278
+ {"epoch": 19, "step": 14229, "loss": 1.456079125404358, "total_norm": 5.035787582397461}
14279
+ {"epoch": 19, "step": 14230, "loss": 1.4626939296722412, "total_norm": 3.5663020610809326}
14280
+ {"epoch": 19, "step": 14231, "loss": 1.3957817554473877, "total_norm": 2.940375804901123}
14281
+ {"epoch": 19, "step": 14232, "loss": 1.3818804025650024, "total_norm": 4.608449935913086}
14282
+ {"epoch": 19, "step": 14233, "loss": 1.1981031894683838, "total_norm": 3.8377532958984375}
14283
+ {"epoch": 19, "step": 14234, "loss": 1.3151905536651611, "total_norm": 3.5235235691070557}
14284
+ {"epoch": 19, "step": 14235, "loss": 1.352946162223816, "total_norm": 2.8899879455566406}
14285
+ {"epoch": 19, "step": 14236, "loss": 1.1346720457077026, "total_norm": 3.7353975772857666}
14286
+ {"epoch": 19, "step": 14237, "loss": 1.3318907022476196, "total_norm": 3.2981343269348145}
14287
+ {"epoch": 19, "step": 14238, "loss": 1.2848069667816162, "total_norm": 3.99418568611145}
14288
+ {"epoch": 19, "step": 14239, "loss": 1.2050451040267944, "total_norm": 4.169943332672119}
14289
+ {"epoch": 19, "step": 14240, "loss": 1.1286921501159668, "total_norm": 4.349429130554199}
14290
+ {"epoch": 19, "step": 14241, "loss": 1.6910922527313232, "total_norm": 4.455322265625}
14291
+ {"epoch": 19, "step": 14242, "loss": 1.5331099033355713, "total_norm": 4.076201438903809}
14292
+ {"epoch": 19, "step": 14243, "loss": 1.4278558492660522, "total_norm": 4.205799579620361}
14293
+ {"epoch": 19, "step": 14244, "loss": 1.1146072149276733, "total_norm": 4.290293216705322}
14294
+ {"epoch": 19, "step": 14245, "loss": 1.278074860572815, "total_norm": 3.664496421813965}
14295
+ {"epoch": 19, "step": 14246, "loss": 1.4787036180496216, "total_norm": 2.920013666152954}
14296
+ {"epoch": 19, "step": 14247, "loss": 1.1214327812194824, "total_norm": 3.3388211727142334}
14297
+ {"epoch": 19, "step": 14248, "loss": 1.6130406856536865, "total_norm": 3.2083828449249268}
14298
+ {"epoch": 19, "step": 14249, "loss": 1.6592246294021606, "total_norm": 3.9070985317230225}
14299
+ {"epoch": 19, "step": 14250, "loss": 1.4536032676696777, "total_norm": 3.378190279006958}
14300
+ {"epoch": 19, "step": 14251, "loss": 1.170986294746399, "total_norm": 2.957974672317505}
14301
+ {"epoch": 19, "step": 14252, "loss": 1.2867107391357422, "total_norm": 3.2986881732940674}
14302
+ {"epoch": 19, "step": 14253, "loss": 1.0990557670593262, "total_norm": 3.330235242843628}
14303
+ {"epoch": 19, "step": 14254, "loss": 1.1014639139175415, "total_norm": 3.7158782482147217}
14304
+ {"epoch": 19, "step": 14255, "loss": 1.101760983467102, "total_norm": 4.6914753913879395}
14305
+ {"epoch": 19, "step": 14256, "loss": 1.145251750946045, "total_norm": 3.0548672676086426}
14306
+ {"epoch": 19, "step": 14257, "loss": 1.151097059249878, "total_norm": 3.539501905441284}
14307
+ {"epoch": 19, "step": 14258, "loss": 1.4061405658721924, "total_norm": 3.4499030113220215}
14308
+ {"epoch": 19, "step": 14259, "loss": 1.3993799686431885, "total_norm": 3.1999270915985107}
14309
+ {"epoch": 19, "step": 14260, "loss": 1.3920984268188477, "total_norm": 3.031623125076294}
14310
+ {"epoch": 19, "step": 14261, "loss": 1.2672779560089111, "total_norm": 3.100372791290283}
14311
+ {"epoch": 19, "step": 14262, "loss": 1.2827703952789307, "total_norm": 4.00330114364624}
14312
+ {"epoch": 19, "step": 14263, "loss": 1.5199024677276611, "total_norm": 3.537295341491699}
14313
+ {"epoch": 19, "step": 14264, "loss": 1.0405967235565186, "total_norm": 4.291873455047607}
14314
+ {"epoch": 19, "step": 14265, "loss": 1.5578588247299194, "total_norm": 3.6963629722595215}
14315
+ {"epoch": 19, "step": 14266, "loss": 1.3397408723831177, "total_norm": 4.114141464233398}
14316
+ {"epoch": 19, "step": 14267, "loss": 1.2584844827651978, "total_norm": 3.380841016769409}
14317
+ {"epoch": 19, "step": 14268, "loss": 1.391126036643982, "total_norm": 3.4593210220336914}
14318
+ {"epoch": 19, "step": 14269, "loss": 1.2660913467407227, "total_norm": 3.11002779006958}
14319
+ {"epoch": 19, "step": 14270, "loss": 1.372972846031189, "total_norm": 3.9872734546661377}
14320
+ {"epoch": 19, "step": 14271, "loss": 1.4853533506393433, "total_norm": 2.9548957347869873}
14321
+ {"epoch": 19, "step": 14272, "loss": 1.1514039039611816, "total_norm": 3.5411806106567383}
14322
+ {"epoch": 19, "step": 14273, "loss": 1.4431135654449463, "total_norm": 3.3432867527008057}
14323
+ {"epoch": 19, "step": 14274, "loss": 1.3397294282913208, "total_norm": 4.466766357421875}
14324
+ {"epoch": 19, "step": 14275, "loss": 1.1074498891830444, "total_norm": 3.1441471576690674}
14325
+ {"epoch": 19, "step": 14276, "loss": 1.5825378894805908, "total_norm": 4.013261795043945}
14326
+ {"epoch": 19, "step": 14277, "loss": 1.1250299215316772, "total_norm": 4.25001335144043}
14327
+ {"epoch": 19, "step": 14278, "loss": 1.284100890159607, "total_norm": 3.719804286956787}
14328
+ {"epoch": 19, "step": 14279, "loss": 1.232558250427246, "total_norm": 3.555481195449829}
14329
+ {"epoch": 19, "step": 14280, "loss": 1.2943296432495117, "total_norm": 4.212050914764404}
14330
+ {"epoch": 19, "step": 14281, "loss": 1.5064153671264648, "total_norm": 2.9057021141052246}
14331
+ {"epoch": 19, "step": 14282, "loss": 1.2089567184448242, "total_norm": 4.000471591949463}
14332
+ {"epoch": 19, "step": 14283, "loss": 1.1849435567855835, "total_norm": 2.8257317543029785}
14333
+ {"epoch": 19, "step": 14284, "loss": 1.4365811347961426, "total_norm": 3.371497631072998}
14334
+ {"epoch": 19, "step": 14285, "loss": 1.2322202920913696, "total_norm": 3.542379140853882}
14335
+ {"epoch": 19, "step": 14286, "loss": 1.1453536748886108, "total_norm": 3.482057809829712}
14336
+ {"epoch": 19, "step": 14287, "loss": 1.2765687704086304, "total_norm": 3.9998087882995605}
14337
+ {"epoch": 19, "step": 14288, "loss": 1.18867027759552, "total_norm": 3.6891605854034424}
14338
+ {"epoch": 19, "step": 14289, "loss": 1.2718111276626587, "total_norm": 4.101797580718994}
14339
+ {"epoch": 19, "step": 14290, "loss": 1.0911728143692017, "total_norm": 3.469820261001587}
14340
+ {"epoch": 19, "step": 14291, "loss": 1.1515878438949585, "total_norm": 3.835754871368408}
14341
+ {"epoch": 19, "step": 14292, "loss": 1.4279913902282715, "total_norm": 4.215106010437012}
14342
+ {"epoch": 19, "step": 14293, "loss": 1.5682564973831177, "total_norm": 3.6422460079193115}
14343
+ {"epoch": 19, "step": 14294, "loss": 1.2469456195831299, "total_norm": 3.110564947128296}
14344
+ {"epoch": 19, "step": 14295, "loss": 1.246625304222107, "total_norm": 4.355443477630615}
14345
+ {"epoch": 19, "step": 14296, "loss": 1.165331482887268, "total_norm": 3.630126714706421}
14346
+ {"epoch": 19, "step": 14297, "loss": 1.778786540031433, "total_norm": 3.928060293197632}
14347
+ {"epoch": 19, "step": 14298, "loss": 1.2342469692230225, "total_norm": 3.8362205028533936}
14348
+ {"epoch": 19, "step": 14299, "loss": 1.253922462463379, "total_norm": 3.77811336517334}
14349
+ {"epoch": 19, "step": 14300, "eval_loss": 3.112957239151001, "eval_rougeL": 0.10973326032172814}