Đào Quốc Tuấn commited on
Commit
1ac6276
·
verified ·
1 Parent(s): 83212a8

Upload folder using huggingface_hub

Browse files
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "transformers_version": "4.56.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.56.0"
6
+ }
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a33bb29e69452a773b8b7a24e3512189dc63e3ab7955cfc7ce3f6b787bff9c
3
+ size 497774208
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
experiments/sft_gpt2-120m/20251117_231920/checkpoints/epoch_19/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251117_231920/sft_gpt2-120m-3.log CHANGED
@@ -560,3 +560,32 @@
560
  2025-11-18 00:09:15,395 - absl - INFO - Using default tokenizer.
561
  2025-11-18 00:09:18,929 - root - INFO - Epoch 19/20 eval loss: 3.511649340391159, eval rougeL: 0.11313993216799487
562
  2025-11-18 00:09:19,670 - root - INFO - Epoch 20/20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
560
  2025-11-18 00:09:15,395 - absl - INFO - Using default tokenizer.
561
  2025-11-18 00:09:18,929 - root - INFO - Epoch 19/20 eval loss: 3.511649340391159, eval rougeL: 0.11313993216799487
562
  2025-11-18 00:09:19,670 - root - INFO - Epoch 20/20
563
+ 2025-11-18 00:09:43,699 - absl - INFO - Using default tokenizer.
564
+ 2025-11-18 00:09:45,654 - root - INFO - Step 6901/7160 train rougeL: 0.11803548722890815
565
+ 2025-11-18 00:09:45,809 - root - INFO - Step 6901/7160 loss: 1.069823145866394, total_norm: 2.2482874393463135
566
+ 2025-11-18 00:10:09,937 - root - INFO - Step 7001/7160 finished
567
+ 2025-11-18 00:10:10,271 - absl - INFO - Using default tokenizer.
568
+ 2025-11-18 00:10:14,817 - absl - INFO - Using default tokenizer.
569
+ 2025-11-18 00:10:19,273 - absl - INFO - Using default tokenizer.
570
+ 2025-11-18 00:10:23,753 - absl - INFO - Using default tokenizer.
571
+ 2025-11-18 00:10:28,307 - absl - INFO - Using default tokenizer.
572
+ 2025-11-18 00:10:32,780 - absl - INFO - Using default tokenizer.
573
+ 2025-11-18 00:10:37,246 - absl - INFO - Using default tokenizer.
574
+ 2025-11-18 00:10:41,648 - absl - INFO - Using default tokenizer.
575
+ 2025-11-18 00:10:45,182 - root - INFO - Epoch 20/20 eval loss: 3.5214541256427765, eval rougeL: 0.11370361682600127
576
+ 2025-11-18 00:10:45,269 - absl - INFO - Using default tokenizer.
577
+ 2025-11-18 00:10:47,242 - root - INFO - Step 7001/7160 train rougeL: 0.15675778332370002
578
+ 2025-11-18 00:10:47,397 - root - INFO - Step 7001/7160 loss: 1.1128922700881958, total_norm: 2.13863205909729
579
+ 2025-11-18 00:11:11,587 - absl - INFO - Using default tokenizer.
580
+ 2025-11-18 00:11:13,573 - root - INFO - Step 7101/7160 train rougeL: 0.15740878852750026
581
+ 2025-11-18 00:11:13,728 - root - INFO - Step 7101/7160 loss: 1.055530309677124, total_norm: 2.0044915676116943
582
+ 2025-11-18 00:11:27,950 - root - INFO - Epoch 20/20 finished
583
+ 2025-11-18 00:11:28,276 - absl - INFO - Using default tokenizer.
584
+ 2025-11-18 00:11:32,792 - absl - INFO - Using default tokenizer.
585
+ 2025-11-18 00:11:37,230 - absl - INFO - Using default tokenizer.
586
+ 2025-11-18 00:11:41,776 - absl - INFO - Using default tokenizer.
587
+ 2025-11-18 00:11:46,583 - absl - INFO - Using default tokenizer.
588
+ 2025-11-18 00:11:51,036 - absl - INFO - Using default tokenizer.
589
+ 2025-11-18 00:11:55,488 - absl - INFO - Using default tokenizer.
590
+ 2025-11-18 00:11:59,893 - absl - INFO - Using default tokenizer.
591
+ 2025-11-18 00:12:03,418 - root - INFO - Epoch 20/20 eval loss: 3.5230754017829895, eval rougeL: 0.11275351405322884
experiments/sft_gpt2-120m/20251117_231920/sft_gpt2-120m-3_metrics.jsonl CHANGED
@@ -6900,3 +6900,296 @@
6900
  {"epoch": 19, "step": 6866, "loss": 0.8937848806381226, "total_norm": 2.082096576690674}
6901
  {"epoch": 19, "step": 6867, "loss": 1.0529240369796753, "total_norm": 2.2550768852233887}
6902
  {"epoch": 19, "step": 6868, "loss": 1.0548574924468994, "total_norm": 2.001913070678711}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6900
  {"epoch": 19, "step": 6866, "loss": 0.8937848806381226, "total_norm": 2.082096576690674}
6901
  {"epoch": 19, "step": 6867, "loss": 1.0529240369796753, "total_norm": 2.2550768852233887}
6902
  {"epoch": 19, "step": 6868, "loss": 1.0548574924468994, "total_norm": 2.001913070678711}
6903
+ {"epoch": 19, "step": 6869, "loss": 0.956845760345459, "total_norm": 1.8884351253509521}
6904
+ {"epoch": 19, "step": 6870, "loss": 0.953325629234314, "total_norm": 2.191408634185791}
6905
+ {"epoch": 19, "step": 6871, "loss": 1.0214215517044067, "total_norm": 2.2155375480651855}
6906
+ {"epoch": 19, "step": 6872, "loss": 0.9833791255950928, "total_norm": 2.110361337661743}
6907
+ {"epoch": 19, "step": 6873, "loss": 0.7849990725517273, "total_norm": 1.9442347288131714}
6908
+ {"epoch": 19, "step": 6874, "loss": 0.9336222410202026, "total_norm": 1.9033899307250977}
6909
+ {"epoch": 19, "step": 6875, "loss": 1.1075769662857056, "total_norm": 1.919631838798523}
6910
+ {"epoch": 19, "step": 6876, "loss": 1.054406762123108, "total_norm": 2.438117265701294}
6911
+ {"epoch": 19, "step": 6877, "loss": 0.7375928163528442, "total_norm": 1.8591747283935547}
6912
+ {"epoch": 19, "step": 6878, "loss": 0.9381473660469055, "total_norm": 2.3920321464538574}
6913
+ {"epoch": 19, "step": 6879, "loss": 1.0483653545379639, "total_norm": 1.9371604919433594}
6914
+ {"epoch": 19, "step": 6880, "loss": 0.8435301780700684, "total_norm": 2.0042455196380615}
6915
+ {"epoch": 19, "step": 6881, "loss": 0.9901301860809326, "total_norm": 2.222156286239624}
6916
+ {"epoch": 19, "step": 6882, "loss": 0.994422972202301, "total_norm": 2.2989017963409424}
6917
+ {"epoch": 19, "step": 6883, "loss": 0.8921369910240173, "total_norm": 2.464988946914673}
6918
+ {"epoch": 19, "step": 6884, "loss": 0.9643175005912781, "total_norm": 2.2248263359069824}
6919
+ {"epoch": 19, "step": 6885, "loss": 1.0734280347824097, "total_norm": 2.202399969100952}
6920
+ {"epoch": 19, "step": 6886, "loss": 1.1113524436950684, "total_norm": 2.1435580253601074}
6921
+ {"epoch": 19, "step": 6887, "loss": 1.1511136293411255, "total_norm": 2.174923896789551}
6922
+ {"epoch": 19, "step": 6888, "loss": 1.0782922506332397, "total_norm": 2.0772650241851807}
6923
+ {"epoch": 19, "step": 6889, "loss": 1.0159008502960205, "total_norm": 2.4715816974639893}
6924
+ {"epoch": 19, "step": 6890, "loss": 0.9337886571884155, "total_norm": 2.1779024600982666}
6925
+ {"epoch": 19, "step": 6891, "loss": 0.9618430137634277, "total_norm": 2.1075124740600586}
6926
+ {"epoch": 19, "step": 6892, "loss": 0.6936129927635193, "total_norm": 2.2419233322143555}
6927
+ {"epoch": 19, "step": 6893, "loss": 1.0113847255706787, "total_norm": 2.076672077178955}
6928
+ {"epoch": 19, "step": 6894, "loss": 0.9418612718582153, "total_norm": 1.9536205530166626}
6929
+ {"epoch": 19, "step": 6895, "loss": 0.8445526361465454, "total_norm": 1.8435953855514526}
6930
+ {"epoch": 19, "step": 6896, "loss": 0.8890517950057983, "total_norm": 2.3285293579101562}
6931
+ {"epoch": 19, "step": 6897, "loss": 0.8605286478996277, "total_norm": 2.403531789779663}
6932
+ {"epoch": 19, "step": 6898, "loss": 0.936241865158081, "total_norm": 2.2462048530578613}
6933
+ {"epoch": 19, "step": 6899, "loss": 0.985892653465271, "total_norm": 2.361196279525757}
6934
+ {"epoch": 19, "step": 6900, "loss": 1.069823145866394, "total_norm": 2.2482874393463135}
6935
+ {"epoch": 19, "step": 6901, "loss": 0.949264407157898, "total_norm": 2.4188482761383057}
6936
+ {"epoch": 19, "step": 6902, "loss": 0.8783237338066101, "total_norm": 2.3609557151794434}
6937
+ {"epoch": 19, "step": 6903, "loss": 1.020238995552063, "total_norm": 2.260701894760132}
6938
+ {"epoch": 19, "step": 6904, "loss": 1.0693256855010986, "total_norm": 2.276893138885498}
6939
+ {"epoch": 19, "step": 6905, "loss": 0.9427741169929504, "total_norm": 2.252544641494751}
6940
+ {"epoch": 19, "step": 6906, "loss": 0.9233540892601013, "total_norm": 1.9294661283493042}
6941
+ {"epoch": 19, "step": 6907, "loss": 1.0738775730133057, "total_norm": 2.1937506198883057}
6942
+ {"epoch": 19, "step": 6908, "loss": 0.947857141494751, "total_norm": 2.7249279022216797}
6943
+ {"epoch": 19, "step": 6909, "loss": 0.9879264831542969, "total_norm": 2.0465290546417236}
6944
+ {"epoch": 19, "step": 6910, "loss": 1.0990848541259766, "total_norm": 2.1441245079040527}
6945
+ {"epoch": 19, "step": 6911, "loss": 0.9513936042785645, "total_norm": 2.3086674213409424}
6946
+ {"epoch": 19, "step": 6912, "loss": 1.040450096130371, "total_norm": 2.037036418914795}
6947
+ {"epoch": 19, "step": 6913, "loss": 0.9788554906845093, "total_norm": 2.106426477432251}
6948
+ {"epoch": 19, "step": 6914, "loss": 0.9122462272644043, "total_norm": 1.6863927841186523}
6949
+ {"epoch": 19, "step": 6915, "loss": 0.8810529708862305, "total_norm": 1.8880096673965454}
6950
+ {"epoch": 19, "step": 6916, "loss": 0.9956991076469421, "total_norm": 2.1052911281585693}
6951
+ {"epoch": 19, "step": 6917, "loss": 0.7554677128791809, "total_norm": 2.2126879692077637}
6952
+ {"epoch": 19, "step": 6918, "loss": 0.8935810923576355, "total_norm": 2.1456363201141357}
6953
+ {"epoch": 19, "step": 6919, "loss": 0.7286329865455627, "total_norm": 2.2539279460906982}
6954
+ {"epoch": 19, "step": 6920, "loss": 0.9911085963249207, "total_norm": 1.983747959136963}
6955
+ {"epoch": 19, "step": 6921, "loss": 0.7945812344551086, "total_norm": 1.9764539003372192}
6956
+ {"epoch": 19, "step": 6922, "loss": 1.0102192163467407, "total_norm": 2.3750619888305664}
6957
+ {"epoch": 19, "step": 6923, "loss": 1.0208792686462402, "total_norm": 2.213378429412842}
6958
+ {"epoch": 19, "step": 6924, "loss": 0.9644502997398376, "total_norm": 2.5459516048431396}
6959
+ {"epoch": 19, "step": 6925, "loss": 0.9470763802528381, "total_norm": 1.8927931785583496}
6960
+ {"epoch": 19, "step": 6926, "loss": 1.0154966115951538, "total_norm": 2.203429937362671}
6961
+ {"epoch": 19, "step": 6927, "loss": 0.9094064235687256, "total_norm": 2.013036012649536}
6962
+ {"epoch": 19, "step": 6928, "loss": 1.1079026460647583, "total_norm": 2.0803496837615967}
6963
+ {"epoch": 19, "step": 6929, "loss": 1.058545470237732, "total_norm": 1.8330177068710327}
6964
+ {"epoch": 19, "step": 6930, "loss": 1.0633176565170288, "total_norm": 2.139163017272949}
6965
+ {"epoch": 19, "step": 6931, "loss": 1.0066580772399902, "total_norm": 2.2102503776550293}
6966
+ {"epoch": 19, "step": 6932, "loss": 1.0006203651428223, "total_norm": 2.446329116821289}
6967
+ {"epoch": 19, "step": 6933, "loss": 0.9236804842948914, "total_norm": 2.237297534942627}
6968
+ {"epoch": 19, "step": 6934, "loss": 0.8919469714164734, "total_norm": 2.0536489486694336}
6969
+ {"epoch": 19, "step": 6935, "loss": 0.7509689331054688, "total_norm": 2.007594108581543}
6970
+ {"epoch": 19, "step": 6936, "loss": 0.8987389206886292, "total_norm": 2.287471294403076}
6971
+ {"epoch": 19, "step": 6937, "loss": 0.8933175802230835, "total_norm": 2.3542613983154297}
6972
+ {"epoch": 19, "step": 6938, "loss": 1.0282896757125854, "total_norm": 1.9594788551330566}
6973
+ {"epoch": 19, "step": 6939, "loss": 0.8692252039909363, "total_norm": 2.225510835647583}
6974
+ {"epoch": 19, "step": 6940, "loss": 1.038893461227417, "total_norm": 2.094167470932007}
6975
+ {"epoch": 19, "step": 6941, "loss": 0.8996456265449524, "total_norm": 2.2807247638702393}
6976
+ {"epoch": 19, "step": 6942, "loss": 0.9856517910957336, "total_norm": 2.219956874847412}
6977
+ {"epoch": 19, "step": 6943, "loss": 0.9309678673744202, "total_norm": 2.151196241378784}
6978
+ {"epoch": 19, "step": 6944, "loss": 0.990230143070221, "total_norm": 1.9134916067123413}
6979
+ {"epoch": 19, "step": 6945, "loss": 0.9145349264144897, "total_norm": 2.4225001335144043}
6980
+ {"epoch": 19, "step": 6946, "loss": 0.9196827411651611, "total_norm": 1.9182929992675781}
6981
+ {"epoch": 19, "step": 6947, "loss": 0.9659733176231384, "total_norm": 2.0149614810943604}
6982
+ {"epoch": 19, "step": 6948, "loss": 0.9372978806495667, "total_norm": 1.915818691253662}
6983
+ {"epoch": 19, "step": 6949, "loss": 1.0568420886993408, "total_norm": 2.6204426288604736}
6984
+ {"epoch": 19, "step": 6950, "loss": 0.7919695377349854, "total_norm": 1.7510161399841309}
6985
+ {"epoch": 19, "step": 6951, "loss": 1.0266350507736206, "total_norm": 2.236429214477539}
6986
+ {"epoch": 19, "step": 6952, "loss": 0.9685720205307007, "total_norm": 2.0537679195404053}
6987
+ {"epoch": 19, "step": 6953, "loss": 0.9450114369392395, "total_norm": 1.9971708059310913}
6988
+ {"epoch": 19, "step": 6954, "loss": 1.142397403717041, "total_norm": 1.818435549736023}
6989
+ {"epoch": 19, "step": 6955, "loss": 0.8478761315345764, "total_norm": 2.23006010055542}
6990
+ {"epoch": 19, "step": 6956, "loss": 1.0589072704315186, "total_norm": 1.9676119089126587}
6991
+ {"epoch": 19, "step": 6957, "loss": 1.0863511562347412, "total_norm": 1.9803886413574219}
6992
+ {"epoch": 19, "step": 6958, "loss": 0.8859145641326904, "total_norm": 2.3226351737976074}
6993
+ {"epoch": 19, "step": 6959, "loss": 0.9547861218452454, "total_norm": 2.2813353538513184}
6994
+ {"epoch": 19, "step": 6960, "loss": 1.0618929862976074, "total_norm": 2.1635935306549072}
6995
+ {"epoch": 19, "step": 6961, "loss": 0.994769811630249, "total_norm": 2.334472417831421}
6996
+ {"epoch": 19, "step": 6962, "loss": 0.9463576674461365, "total_norm": 2.3125622272491455}
6997
+ {"epoch": 19, "step": 6963, "loss": 0.7869423031806946, "total_norm": 2.181838274002075}
6998
+ {"epoch": 19, "step": 6964, "loss": 0.9782577157020569, "total_norm": 1.8346328735351562}
6999
+ {"epoch": 19, "step": 6965, "loss": 1.0985389947891235, "total_norm": 2.5115554332733154}
7000
+ {"epoch": 19, "step": 6966, "loss": 1.0277409553527832, "total_norm": 2.5366673469543457}
7001
+ {"epoch": 19, "step": 6967, "loss": 1.1022998094558716, "total_norm": 2.5560286045074463}
7002
+ {"epoch": 19, "step": 6968, "loss": 1.0439813137054443, "total_norm": 2.3058342933654785}
7003
+ {"epoch": 19, "step": 6969, "loss": 0.8692848086357117, "total_norm": 2.2116386890411377}
7004
+ {"epoch": 19, "step": 6970, "loss": 1.058517336845398, "total_norm": 2.149120569229126}
7005
+ {"epoch": 19, "step": 6971, "loss": 0.9467601180076599, "total_norm": 1.9894059896469116}
7006
+ {"epoch": 19, "step": 6972, "loss": 1.0430189371109009, "total_norm": 1.8023005723953247}
7007
+ {"epoch": 19, "step": 6973, "loss": 0.8603999614715576, "total_norm": 1.7587926387786865}
7008
+ {"epoch": 19, "step": 6974, "loss": 0.970477819442749, "total_norm": 2.0181000232696533}
7009
+ {"epoch": 19, "step": 6975, "loss": 0.9328159093856812, "total_norm": 1.8307887315750122}
7010
+ {"epoch": 19, "step": 6976, "loss": 1.106137752532959, "total_norm": 1.9601081609725952}
7011
+ {"epoch": 19, "step": 6977, "loss": 0.9814282655715942, "total_norm": 2.1260533332824707}
7012
+ {"epoch": 19, "step": 6978, "loss": 0.9663004875183105, "total_norm": 2.0001559257507324}
7013
+ {"epoch": 19, "step": 6979, "loss": 0.9951993227005005, "total_norm": 2.4507079124450684}
7014
+ {"epoch": 19, "step": 6980, "loss": 0.9260803461074829, "total_norm": 1.9259378910064697}
7015
+ {"epoch": 19, "step": 6981, "loss": 0.952734649181366, "total_norm": 1.8264514207839966}
7016
+ {"epoch": 19, "step": 6982, "loss": 0.9772632122039795, "total_norm": 2.3740181922912598}
7017
+ {"epoch": 19, "step": 6983, "loss": 0.9412364363670349, "total_norm": 2.187812089920044}
7018
+ {"epoch": 19, "step": 6984, "loss": 1.09848952293396, "total_norm": 2.0916037559509277}
7019
+ {"epoch": 19, "step": 6985, "loss": 0.764323353767395, "total_norm": 1.8276053667068481}
7020
+ {"epoch": 19, "step": 6986, "loss": 0.817112386226654, "total_norm": 1.8207013607025146}
7021
+ {"epoch": 19, "step": 6987, "loss": 1.0955400466918945, "total_norm": 2.088474988937378}
7022
+ {"epoch": 19, "step": 6988, "loss": 0.9856970310211182, "total_norm": 2.1141703128814697}
7023
+ {"epoch": 19, "step": 6989, "loss": 1.0238953828811646, "total_norm": 2.241731882095337}
7024
+ {"epoch": 19, "step": 6990, "loss": 0.9518187046051025, "total_norm": 2.2154009342193604}
7025
+ {"epoch": 19, "step": 6991, "loss": 1.0417637825012207, "total_norm": 3.1767048835754395}
7026
+ {"epoch": 19, "step": 6992, "loss": 1.1720343828201294, "total_norm": 2.4203708171844482}
7027
+ {"epoch": 19, "step": 6993, "loss": 1.0154680013656616, "total_norm": 1.948460578918457}
7028
+ {"epoch": 19, "step": 6994, "loss": 1.0946097373962402, "total_norm": 2.164571762084961}
7029
+ {"epoch": 19, "step": 6995, "loss": 1.0322253704071045, "total_norm": 1.8893214464187622}
7030
+ {"epoch": 19, "step": 6996, "loss": 0.8894084692001343, "total_norm": 2.4046294689178467}
7031
+ {"epoch": 19, "step": 6997, "loss": 0.8197526931762695, "total_norm": 1.986844539642334}
7032
+ {"epoch": 19, "step": 6998, "loss": 1.1723427772521973, "total_norm": 1.7992804050445557}
7033
+ {"epoch": 19, "step": 6999, "loss": 0.9717972278594971, "total_norm": 2.354714870452881}
7034
+ {"epoch": 19, "step": 7000, "eval_loss": 3.5214541256427765, "eval_rougeL": 0.11370361682600127}
7035
+ {"epoch": 19, "step": 7000, "loss": 1.1128922700881958, "total_norm": 2.13863205909729}
7036
+ {"epoch": 19, "step": 7001, "loss": 0.7528970837593079, "total_norm": 2.5054962635040283}
7037
+ {"epoch": 19, "step": 7002, "loss": 0.9513599872589111, "total_norm": 2.2755606174468994}
7038
+ {"epoch": 19, "step": 7003, "loss": 0.7086869478225708, "total_norm": 2.0153772830963135}
7039
+ {"epoch": 19, "step": 7004, "loss": 0.9868971705436707, "total_norm": 2.0628716945648193}
7040
+ {"epoch": 19, "step": 7005, "loss": 1.0734626054763794, "total_norm": 2.3364882469177246}
7041
+ {"epoch": 19, "step": 7006, "loss": 0.9595001935958862, "total_norm": 1.8835570812225342}
7042
+ {"epoch": 19, "step": 7007, "loss": 0.9325742125511169, "total_norm": 1.9833565950393677}
7043
+ {"epoch": 19, "step": 7008, "loss": 0.9694049954414368, "total_norm": 2.2450594902038574}
7044
+ {"epoch": 19, "step": 7009, "loss": 0.9217339158058167, "total_norm": 2.266529083251953}
7045
+ {"epoch": 19, "step": 7010, "loss": 0.7389083504676819, "total_norm": 2.0624325275421143}
7046
+ {"epoch": 19, "step": 7011, "loss": 0.9662838578224182, "total_norm": 2.648411989212036}
7047
+ {"epoch": 19, "step": 7012, "loss": 0.9563907384872437, "total_norm": 2.4482197761535645}
7048
+ {"epoch": 19, "step": 7013, "loss": 1.0345064401626587, "total_norm": 2.3491077423095703}
7049
+ {"epoch": 19, "step": 7014, "loss": 0.9711717367172241, "total_norm": 2.1522367000579834}
7050
+ {"epoch": 19, "step": 7015, "loss": 1.0831859111785889, "total_norm": 2.526824951171875}
7051
+ {"epoch": 19, "step": 7016, "loss": 1.0964473485946655, "total_norm": 2.261654853820801}
7052
+ {"epoch": 19, "step": 7017, "loss": 0.941961944103241, "total_norm": 2.196976661682129}
7053
+ {"epoch": 19, "step": 7018, "loss": 1.0179048776626587, "total_norm": 1.8604336977005005}
7054
+ {"epoch": 19, "step": 7019, "loss": 1.031291127204895, "total_norm": 2.1300387382507324}
7055
+ {"epoch": 19, "step": 7020, "loss": 0.8688944578170776, "total_norm": 2.065596580505371}
7056
+ {"epoch": 19, "step": 7021, "loss": 1.0722795724868774, "total_norm": 2.16276216506958}
7057
+ {"epoch": 19, "step": 7022, "loss": 0.9276126027107239, "total_norm": 2.26115083694458}
7058
+ {"epoch": 19, "step": 7023, "loss": 0.9316175580024719, "total_norm": 1.8462505340576172}
7059
+ {"epoch": 19, "step": 7024, "loss": 1.0301700830459595, "total_norm": 2.0485646724700928}
7060
+ {"epoch": 19, "step": 7025, "loss": 0.896314263343811, "total_norm": 2.5268728733062744}
7061
+ {"epoch": 19, "step": 7026, "loss": 0.9199168086051941, "total_norm": 2.4221689701080322}
7062
+ {"epoch": 19, "step": 7027, "loss": 0.9362953901290894, "total_norm": 2.1364996433258057}
7063
+ {"epoch": 19, "step": 7028, "loss": 1.0316636562347412, "total_norm": 2.7341268062591553}
7064
+ {"epoch": 19, "step": 7029, "loss": 1.0331467390060425, "total_norm": 2.2750051021575928}
7065
+ {"epoch": 19, "step": 7030, "loss": 0.9868753552436829, "total_norm": 2.299497365951538}
7066
+ {"epoch": 19, "step": 7031, "loss": 1.001501441001892, "total_norm": 2.011687755584717}
7067
+ {"epoch": 19, "step": 7032, "loss": 1.0627403259277344, "total_norm": 2.494314432144165}
7068
+ {"epoch": 19, "step": 7033, "loss": 0.732688844203949, "total_norm": 2.423496723175049}
7069
+ {"epoch": 19, "step": 7034, "loss": 1.011303186416626, "total_norm": 2.5379226207733154}
7070
+ {"epoch": 19, "step": 7035, "loss": 0.9697454571723938, "total_norm": 2.383070468902588}
7071
+ {"epoch": 19, "step": 7036, "loss": 0.9401509165763855, "total_norm": 2.020714282989502}
7072
+ {"epoch": 19, "step": 7037, "loss": 1.0511904954910278, "total_norm": 2.134523868560791}
7073
+ {"epoch": 19, "step": 7038, "loss": 0.9834908843040466, "total_norm": 2.0019540786743164}
7074
+ {"epoch": 19, "step": 7039, "loss": 1.022826075553894, "total_norm": 2.4211764335632324}
7075
+ {"epoch": 19, "step": 7040, "loss": 1.0272319316864014, "total_norm": 2.7690317630767822}
7076
+ {"epoch": 19, "step": 7041, "loss": 1.0156784057617188, "total_norm": 1.8849399089813232}
7077
+ {"epoch": 19, "step": 7042, "loss": 0.9440536499023438, "total_norm": 2.2934746742248535}
7078
+ {"epoch": 19, "step": 7043, "loss": 0.96610426902771, "total_norm": 2.234178066253662}
7079
+ {"epoch": 19, "step": 7044, "loss": 0.8899967670440674, "total_norm": 2.175753593444824}
7080
+ {"epoch": 19, "step": 7045, "loss": 0.8272766470909119, "total_norm": 2.2437081336975098}
7081
+ {"epoch": 19, "step": 7046, "loss": 0.9929255843162537, "total_norm": 2.3534531593322754}
7082
+ {"epoch": 19, "step": 7047, "loss": 0.7814865708351135, "total_norm": 2.4201340675354004}
7083
+ {"epoch": 19, "step": 7048, "loss": 0.8121892809867859, "total_norm": 2.1169636249542236}
7084
+ {"epoch": 19, "step": 7049, "loss": 1.0558141469955444, "total_norm": 2.0264806747436523}
7085
+ {"epoch": 19, "step": 7050, "loss": 0.9145981073379517, "total_norm": 2.293095588684082}
7086
+ {"epoch": 19, "step": 7051, "loss": 0.8409807085990906, "total_norm": 2.253660202026367}
7087
+ {"epoch": 19, "step": 7052, "loss": 0.8930720686912537, "total_norm": 1.9313265085220337}
7088
+ {"epoch": 19, "step": 7053, "loss": 0.8538739085197449, "total_norm": 2.044586420059204}
7089
+ {"epoch": 19, "step": 7054, "loss": 0.8549558520317078, "total_norm": 2.0862460136413574}
7090
+ {"epoch": 19, "step": 7055, "loss": 0.9616589546203613, "total_norm": 2.0946474075317383}
7091
+ {"epoch": 19, "step": 7056, "loss": 1.0670510530471802, "total_norm": 2.1236109733581543}
7092
+ {"epoch": 19, "step": 7057, "loss": 0.9218481779098511, "total_norm": 2.0605156421661377}
7093
+ {"epoch": 19, "step": 7058, "loss": 0.9689083099365234, "total_norm": 1.9805926084518433}
7094
+ {"epoch": 19, "step": 7059, "loss": 0.8964155316352844, "total_norm": 2.216573715209961}
7095
+ {"epoch": 19, "step": 7060, "loss": 1.0813244581222534, "total_norm": 2.400559902191162}
7096
+ {"epoch": 19, "step": 7061, "loss": 1.0182946920394897, "total_norm": 2.009669065475464}
7097
+ {"epoch": 19, "step": 7062, "loss": 0.9501098394393921, "total_norm": 2.2862977981567383}
7098
+ {"epoch": 19, "step": 7063, "loss": 1.036272644996643, "total_norm": 2.0067906379699707}
7099
+ {"epoch": 19, "step": 7064, "loss": 0.9323742985725403, "total_norm": 2.250373363494873}
7100
+ {"epoch": 19, "step": 7065, "loss": 0.9177893996238708, "total_norm": 2.358621835708618}
7101
+ {"epoch": 19, "step": 7066, "loss": 0.9968000650405884, "total_norm": 2.4013137817382812}
7102
+ {"epoch": 19, "step": 7067, "loss": 0.7978839874267578, "total_norm": 1.8765183687210083}
7103
+ {"epoch": 19, "step": 7068, "loss": 0.8304702043533325, "total_norm": 2.0758557319641113}
7104
+ {"epoch": 19, "step": 7069, "loss": 1.0406835079193115, "total_norm": 2.2935354709625244}
7105
+ {"epoch": 19, "step": 7070, "loss": 0.9664809703826904, "total_norm": 2.066673517227173}
7106
+ {"epoch": 19, "step": 7071, "loss": 0.8273698687553406, "total_norm": 2.0826475620269775}
7107
+ {"epoch": 19, "step": 7072, "loss": 0.8763972520828247, "total_norm": 1.8392996788024902}
7108
+ {"epoch": 19, "step": 7073, "loss": 0.7914746999740601, "total_norm": 2.443016767501831}
7109
+ {"epoch": 19, "step": 7074, "loss": 1.0193135738372803, "total_norm": 2.1009511947631836}
7110
+ {"epoch": 19, "step": 7075, "loss": 1.0108946561813354, "total_norm": 2.048048734664917}
7111
+ {"epoch": 19, "step": 7076, "loss": 1.0598605871200562, "total_norm": 2.532162666320801}
7112
+ {"epoch": 19, "step": 7077, "loss": 0.92177414894104, "total_norm": 2.367065906524658}
7113
+ {"epoch": 19, "step": 7078, "loss": 0.9204901456832886, "total_norm": 2.3489468097686768}
7114
+ {"epoch": 19, "step": 7079, "loss": 0.9397857785224915, "total_norm": 2.301786184310913}
7115
+ {"epoch": 19, "step": 7080, "loss": 0.9959754943847656, "total_norm": 2.4565441608428955}
7116
+ {"epoch": 19, "step": 7081, "loss": 0.8819459080696106, "total_norm": 2.1640584468841553}
7117
+ {"epoch": 19, "step": 7082, "loss": 1.0106791257858276, "total_norm": 2.2324161529541016}
7118
+ {"epoch": 19, "step": 7083, "loss": 0.8796620965003967, "total_norm": 2.1432862281799316}
7119
+ {"epoch": 19, "step": 7084, "loss": 1.0346994400024414, "total_norm": 2.334195137023926}
7120
+ {"epoch": 19, "step": 7085, "loss": 0.8665070533752441, "total_norm": 2.455230474472046}
7121
+ {"epoch": 19, "step": 7086, "loss": 0.8724362254142761, "total_norm": 2.190770387649536}
7122
+ {"epoch": 19, "step": 7087, "loss": 0.9135345816612244, "total_norm": 2.0580408573150635}
7123
+ {"epoch": 19, "step": 7088, "loss": 1.0645614862442017, "total_norm": 2.1385843753814697}
7124
+ {"epoch": 19, "step": 7089, "loss": 0.8499649167060852, "total_norm": 2.1588597297668457}
7125
+ {"epoch": 19, "step": 7090, "loss": 0.904139518737793, "total_norm": 2.2303953170776367}
7126
+ {"epoch": 19, "step": 7091, "loss": 0.9921481609344482, "total_norm": 2.140209674835205}
7127
+ {"epoch": 19, "step": 7092, "loss": 0.7803687453269958, "total_norm": 2.0233328342437744}
7128
+ {"epoch": 19, "step": 7093, "loss": 0.8485361337661743, "total_norm": 2.265378952026367}
7129
+ {"epoch": 19, "step": 7094, "loss": 0.8841637969017029, "total_norm": 1.7431678771972656}
7130
+ {"epoch": 19, "step": 7095, "loss": 1.0644142627716064, "total_norm": 2.0698134899139404}
7131
+ {"epoch": 19, "step": 7096, "loss": 0.7221455574035645, "total_norm": 1.967576503753662}
7132
+ {"epoch": 19, "step": 7097, "loss": 0.903188943862915, "total_norm": 2.069768190383911}
7133
+ {"epoch": 19, "step": 7098, "loss": 0.6667285561561584, "total_norm": 2.4725494384765625}
7134
+ {"epoch": 19, "step": 7099, "loss": 1.0064854621887207, "total_norm": 2.176344156265259}
7135
+ {"epoch": 19, "step": 7100, "loss": 1.055530309677124, "total_norm": 2.0044915676116943}
7136
+ {"epoch": 19, "step": 7101, "loss": 0.9249635338783264, "total_norm": 2.6610918045043945}
7137
+ {"epoch": 19, "step": 7102, "loss": 1.1764768362045288, "total_norm": 2.390986204147339}
7138
+ {"epoch": 19, "step": 7103, "loss": 0.9926992654800415, "total_norm": 2.0740933418273926}
7139
+ {"epoch": 19, "step": 7104, "loss": 0.9333260655403137, "total_norm": 1.9224601984024048}
7140
+ {"epoch": 19, "step": 7105, "loss": 0.9996719360351562, "total_norm": 1.9267510175704956}
7141
+ {"epoch": 19, "step": 7106, "loss": 0.8253880739212036, "total_norm": 2.0084424018859863}
7142
+ {"epoch": 19, "step": 7107, "loss": 0.988351047039032, "total_norm": 1.7980711460113525}
7143
+ {"epoch": 19, "step": 7108, "loss": 0.7937043309211731, "total_norm": 2.189807415008545}
7144
+ {"epoch": 19, "step": 7109, "loss": 0.983729362487793, "total_norm": 2.1599135398864746}
7145
+ {"epoch": 19, "step": 7110, "loss": 1.0713106393814087, "total_norm": 2.1843087673187256}
7146
+ {"epoch": 19, "step": 7111, "loss": 0.8900110125541687, "total_norm": 1.912916898727417}
7147
+ {"epoch": 19, "step": 7112, "loss": 0.9128661751747131, "total_norm": 2.0890517234802246}
7148
+ {"epoch": 19, "step": 7113, "loss": 0.9951633810997009, "total_norm": 1.8762894868850708}
7149
+ {"epoch": 19, "step": 7114, "loss": 1.1023377180099487, "total_norm": 2.1743130683898926}
7150
+ {"epoch": 19, "step": 7115, "loss": 1.052733063697815, "total_norm": 2.4147393703460693}
7151
+ {"epoch": 19, "step": 7116, "loss": 1.0395351648330688, "total_norm": 2.0744309425354004}
7152
+ {"epoch": 19, "step": 7117, "loss": 1.001498818397522, "total_norm": 2.1239731311798096}
7153
+ {"epoch": 19, "step": 7118, "loss": 0.9171522855758667, "total_norm": 2.066336154937744}
7154
+ {"epoch": 19, "step": 7119, "loss": 1.0056990385055542, "total_norm": 2.164456844329834}
7155
+ {"epoch": 19, "step": 7120, "loss": 1.1240602731704712, "total_norm": 2.1806423664093018}
7156
+ {"epoch": 19, "step": 7121, "loss": 0.9927192330360413, "total_norm": 2.271285057067871}
7157
+ {"epoch": 19, "step": 7122, "loss": 1.053239345550537, "total_norm": 2.3571248054504395}
7158
+ {"epoch": 19, "step": 7123, "loss": 0.960978090763092, "total_norm": 2.210730791091919}
7159
+ {"epoch": 19, "step": 7124, "loss": 0.9512543678283691, "total_norm": 2.2578303813934326}
7160
+ {"epoch": 19, "step": 7125, "loss": 0.823019802570343, "total_norm": 2.2518928050994873}
7161
+ {"epoch": 19, "step": 7126, "loss": 1.0185447931289673, "total_norm": 2.2135746479034424}
7162
+ {"epoch": 19, "step": 7127, "loss": 1.0319362878799438, "total_norm": 2.3526275157928467}
7163
+ {"epoch": 19, "step": 7128, "loss": 0.9773253798484802, "total_norm": 2.605573892593384}
7164
+ {"epoch": 19, "step": 7129, "loss": 0.9177132248878479, "total_norm": 2.248727321624756}
7165
+ {"epoch": 19, "step": 7130, "loss": 0.7528162002563477, "total_norm": 2.1526403427124023}
7166
+ {"epoch": 19, "step": 7131, "loss": 1.0723429918289185, "total_norm": 2.434816837310791}
7167
+ {"epoch": 19, "step": 7132, "loss": 0.9947962164878845, "total_norm": 2.0023837089538574}
7168
+ {"epoch": 19, "step": 7133, "loss": 0.9768377542495728, "total_norm": 1.8975567817687988}
7169
+ {"epoch": 19, "step": 7134, "loss": 1.0053088665008545, "total_norm": 1.765081524848938}
7170
+ {"epoch": 19, "step": 7135, "loss": 1.0228360891342163, "total_norm": 1.9960452318191528}
7171
+ {"epoch": 19, "step": 7136, "loss": 0.855567991733551, "total_norm": 1.9088129997253418}
7172
+ {"epoch": 19, "step": 7137, "loss": 0.950056254863739, "total_norm": 2.3212618827819824}
7173
+ {"epoch": 19, "step": 7138, "loss": 0.9388453364372253, "total_norm": 2.1661605834960938}
7174
+ {"epoch": 19, "step": 7139, "loss": 0.9138604998588562, "total_norm": 2.063908576965332}
7175
+ {"epoch": 19, "step": 7140, "loss": 0.9172233939170837, "total_norm": 2.2232890129089355}
7176
+ {"epoch": 19, "step": 7141, "loss": 0.9988603591918945, "total_norm": 2.0956342220306396}
7177
+ {"epoch": 19, "step": 7142, "loss": 1.054807424545288, "total_norm": 2.0783443450927734}
7178
+ {"epoch": 19, "step": 7143, "loss": 0.92840576171875, "total_norm": 4.907958984375}
7179
+ {"epoch": 19, "step": 7144, "loss": 0.8940110206604004, "total_norm": 2.4722609519958496}
7180
+ {"epoch": 19, "step": 7145, "loss": 1.0248395204544067, "total_norm": 2.2408180236816406}
7181
+ {"epoch": 19, "step": 7146, "loss": 0.9071243405342102, "total_norm": 2.1310484409332275}
7182
+ {"epoch": 19, "step": 7147, "loss": 0.9536292552947998, "total_norm": 1.9045510292053223}
7183
+ {"epoch": 19, "step": 7148, "loss": 0.9300982356071472, "total_norm": 2.0717697143554688}
7184
+ {"epoch": 19, "step": 7149, "loss": 0.9476808905601501, "total_norm": 1.9408111572265625}
7185
+ {"epoch": 19, "step": 7150, "loss": 1.059009075164795, "total_norm": 2.23828125}
7186
+ {"epoch": 19, "step": 7151, "loss": 1.0218875408172607, "total_norm": 2.5802628993988037}
7187
+ {"epoch": 19, "step": 7152, "loss": 0.8046242594718933, "total_norm": 2.042820930480957}
7188
+ {"epoch": 19, "step": 7153, "loss": 1.047874093055725, "total_norm": 2.604940414428711}
7189
+ {"epoch": 19, "step": 7154, "loss": 0.9886680245399475, "total_norm": 2.090238571166992}
7190
+ {"epoch": 19, "step": 7155, "loss": 0.7961331009864807, "total_norm": 1.9170571565628052}
7191
+ {"epoch": 19, "step": 7156, "loss": 0.9734504818916321, "total_norm": 2.173192262649536}
7192
+ {"epoch": 19, "step": 7157, "loss": 1.0776722431182861, "total_norm": 2.3109467029571533}
7193
+ {"epoch": 19, "step": 7158, "loss": 1.0899198055267334, "total_norm": 2.5292632579803467}
7194
+ {"epoch": 19, "step": 7159, "loss": 0.9745498895645142, "total_norm": 4.019927978515625}
7195
+ {"epoch": 19, "step": 7160, "eval_loss": 3.5230754017829895, "eval_rougeL": 0.11275351405322884}