Đào Quốc Tuấn commited on
Commit
0a9c8b6
·
verified ·
1 Parent(s): d7aae54

Upload folder using huggingface_hub

Browse files
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "transformers_version": "4.56.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.56.0"
6
+ }
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b87253f4634d5237a1e5ec9c660b56fc58cd5d7067e45d73a367654c4ce8decf
3
+ size 497774208
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
experiments/sft_gpt2-120m/20251118_113949/checkpoints/epoch_19/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251118_113949/sft_gpt2-120m.log CHANGED
@@ -388,3 +388,26 @@
388
  2025-11-18 12:23:12,710 - absl - INFO - Using default tokenizer.
389
  2025-11-18 12:23:16,231 - root - INFO - Epoch 19/20 eval loss: 5.02036452293396, eval rougeL: 0.11912210136407948
390
  2025-11-18 12:23:16,820 - root - INFO - Epoch 20/20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  2025-11-18 12:23:12,710 - absl - INFO - Using default tokenizer.
389
  2025-11-18 12:23:16,231 - root - INFO - Epoch 19/20 eval loss: 5.02036452293396, eval rougeL: 0.11912210136407948
390
  2025-11-18 12:23:16,820 - root - INFO - Epoch 20/20
391
+ 2025-11-18 12:24:02,673 - root - INFO - Step 3501/3580 finished
392
+ 2025-11-18 12:24:02,999 - absl - INFO - Using default tokenizer.
393
+ 2025-11-18 12:24:07,521 - absl - INFO - Using default tokenizer.
394
+ 2025-11-18 12:24:11,952 - absl - INFO - Using default tokenizer.
395
+ 2025-11-18 12:24:16,444 - absl - INFO - Using default tokenizer.
396
+ 2025-11-18 12:24:20,974 - absl - INFO - Using default tokenizer.
397
+ 2025-11-18 12:24:25,472 - absl - INFO - Using default tokenizer.
398
+ 2025-11-18 12:24:29,970 - absl - INFO - Using default tokenizer.
399
+ 2025-11-18 12:24:34,543 - absl - INFO - Using default tokenizer.
400
+ 2025-11-18 12:24:38,045 - root - INFO - Epoch 20/20 eval loss: 5.0220290422439575, eval rougeL: 0.11872779423918271
401
+ 2025-11-18 12:24:38,212 - absl - INFO - Using default tokenizer.
402
+ 2025-11-18 12:24:40,792 - root - INFO - Step 3501/3580 train rougeL: 0.36769257417936035
403
+ 2025-11-18 12:24:41,082 - root - INFO - Step 3501/3580 loss: 0.07207430899143219, total_norm: 0.3702149987220764
404
+ 2025-11-18 12:25:17,444 - root - INFO - Epoch 20/20 finished
405
+ 2025-11-18 12:25:17,771 - absl - INFO - Using default tokenizer.
406
+ 2025-11-18 12:25:22,270 - absl - INFO - Using default tokenizer.
407
+ 2025-11-18 12:25:26,738 - absl - INFO - Using default tokenizer.
408
+ 2025-11-18 12:25:31,201 - absl - INFO - Using default tokenizer.
409
+ 2025-11-18 12:25:35,716 - absl - INFO - Using default tokenizer.
410
+ 2025-11-18 12:25:40,175 - absl - INFO - Using default tokenizer.
411
+ 2025-11-18 12:25:44,623 - absl - INFO - Using default tokenizer.
412
+ 2025-11-18 12:25:49,008 - absl - INFO - Using default tokenizer.
413
+ 2025-11-18 12:25:52,532 - root - INFO - Epoch 20/20 eval loss: 5.02215451002121, eval rougeL: 0.11894351165818984
experiments/sft_gpt2-120m/20251118_113949/sft_gpt2-120m_metrics.jsonl CHANGED
@@ -3455,3 +3455,154 @@
3455
  {"epoch": 19, "step": 3428, "loss": 0.06631392240524292, "total_norm": 0.38597530126571655}
3456
  {"epoch": 19, "step": 3429, "loss": 0.06341607123613358, "total_norm": 0.39210784435272217}
3457
  {"epoch": 19, "step": 3430, "loss": 0.07612670958042145, "total_norm": 0.46375593543052673}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3455
  {"epoch": 19, "step": 3428, "loss": 0.06631392240524292, "total_norm": 0.38597530126571655}
3456
  {"epoch": 19, "step": 3429, "loss": 0.06341607123613358, "total_norm": 0.39210784435272217}
3457
  {"epoch": 19, "step": 3430, "loss": 0.07612670958042145, "total_norm": 0.46375593543052673}
3458
+ {"epoch": 19, "step": 3431, "loss": 0.06649475544691086, "total_norm": 0.40509912371635437}
3459
+ {"epoch": 19, "step": 3432, "loss": 0.06405585259199142, "total_norm": 0.3777313530445099}
3460
+ {"epoch": 19, "step": 3433, "loss": 0.07823031395673752, "total_norm": 0.44407787919044495}
3461
+ {"epoch": 19, "step": 3434, "loss": 0.07584518194198608, "total_norm": 0.4376632571220398}
3462
+ {"epoch": 19, "step": 3435, "loss": 0.06603962182998657, "total_norm": 0.37467437982559204}
3463
+ {"epoch": 19, "step": 3436, "loss": 0.06879804283380508, "total_norm": 0.36169669032096863}
3464
+ {"epoch": 19, "step": 3437, "loss": 0.0623231902718544, "total_norm": 0.35651248693466187}
3465
+ {"epoch": 19, "step": 3438, "loss": 0.06521435081958771, "total_norm": 0.3765014410018921}
3466
+ {"epoch": 19, "step": 3439, "loss": 0.06612592935562134, "total_norm": 0.398922860622406}
3467
+ {"epoch": 19, "step": 3440, "loss": 0.07783536612987518, "total_norm": 0.46882691979408264}
3468
+ {"epoch": 19, "step": 3441, "loss": 0.08135991543531418, "total_norm": 0.4426175355911255}
3469
+ {"epoch": 19, "step": 3442, "loss": 0.06669371575117111, "total_norm": 0.385215699672699}
3470
+ {"epoch": 19, "step": 3443, "loss": 0.07437398284673691, "total_norm": 0.3940950930118561}
3471
+ {"epoch": 19, "step": 3444, "loss": 0.06909912824630737, "total_norm": 0.43716415762901306}
3472
+ {"epoch": 19, "step": 3445, "loss": 0.06889934092760086, "total_norm": 0.3929239809513092}
3473
+ {"epoch": 19, "step": 3446, "loss": 0.0719086155295372, "total_norm": 0.3722221851348877}
3474
+ {"epoch": 19, "step": 3447, "loss": 0.07598002254962921, "total_norm": 0.4522363543510437}
3475
+ {"epoch": 19, "step": 3448, "loss": 0.06892066448926926, "total_norm": 0.45051389932632446}
3476
+ {"epoch": 19, "step": 3449, "loss": 0.060814905911684036, "total_norm": 0.3825909197330475}
3477
+ {"epoch": 19, "step": 3450, "loss": 0.06693698465824127, "total_norm": 0.46046268939971924}
3478
+ {"epoch": 19, "step": 3451, "loss": 0.07806388288736343, "total_norm": 0.35896995663642883}
3479
+ {"epoch": 19, "step": 3452, "loss": 0.07569962739944458, "total_norm": 0.3824521601200104}
3480
+ {"epoch": 19, "step": 3453, "loss": 0.07433344423770905, "total_norm": 0.43837347626686096}
3481
+ {"epoch": 19, "step": 3454, "loss": 0.07127473503351212, "total_norm": 0.3958043158054352}
3482
+ {"epoch": 19, "step": 3455, "loss": 0.07177051156759262, "total_norm": 0.44299301505088806}
3483
+ {"epoch": 19, "step": 3456, "loss": 0.06487474590539932, "total_norm": 0.40540966391563416}
3484
+ {"epoch": 19, "step": 3457, "loss": 0.06358999013900757, "total_norm": 0.3553120493888855}
3485
+ {"epoch": 19, "step": 3458, "loss": 0.07439502328634262, "total_norm": 0.4041728973388672}
3486
+ {"epoch": 19, "step": 3459, "loss": 0.06909509748220444, "total_norm": 0.38303014636039734}
3487
+ {"epoch": 19, "step": 3460, "loss": 0.06487569212913513, "total_norm": 0.4316783845424652}
3488
+ {"epoch": 19, "step": 3461, "loss": 0.06462734192609787, "total_norm": 0.39192458987236023}
3489
+ {"epoch": 19, "step": 3462, "loss": 0.06951399892568588, "total_norm": 0.43483564257621765}
3490
+ {"epoch": 19, "step": 3463, "loss": 0.06610497087240219, "total_norm": 0.4542086124420166}
3491
+ {"epoch": 19, "step": 3464, "loss": 0.07372887432575226, "total_norm": 0.4204694926738739}
3492
+ {"epoch": 19, "step": 3465, "loss": 0.08062244951725006, "total_norm": 0.4212283790111542}
3493
+ {"epoch": 19, "step": 3466, "loss": 0.06950519979000092, "total_norm": 0.37755486369132996}
3494
+ {"epoch": 19, "step": 3467, "loss": 0.07102422416210175, "total_norm": 0.4094632565975189}
3495
+ {"epoch": 19, "step": 3468, "loss": 0.060225386172533035, "total_norm": 0.3812105655670166}
3496
+ {"epoch": 19, "step": 3469, "loss": 0.06577896326780319, "total_norm": 0.4254424571990967}
3497
+ {"epoch": 19, "step": 3470, "loss": 0.06463371217250824, "total_norm": 0.40942075848579407}
3498
+ {"epoch": 19, "step": 3471, "loss": 0.07263350486755371, "total_norm": 0.3554999530315399}
3499
+ {"epoch": 19, "step": 3472, "loss": 0.07966326177120209, "total_norm": 0.4493215084075928}
3500
+ {"epoch": 19, "step": 3473, "loss": 0.06410136073827744, "total_norm": 0.3939339220523834}
3501
+ {"epoch": 19, "step": 3474, "loss": 0.07636649906635284, "total_norm": 0.4016420841217041}
3502
+ {"epoch": 19, "step": 3475, "loss": 0.07257033884525299, "total_norm": 0.4046069383621216}
3503
+ {"epoch": 19, "step": 3476, "loss": 0.07237204909324646, "total_norm": 0.39212438464164734}
3504
+ {"epoch": 19, "step": 3477, "loss": 0.06761867552995682, "total_norm": 0.42311716079711914}
3505
+ {"epoch": 19, "step": 3478, "loss": 0.06813068687915802, "total_norm": 0.37189480662345886}
3506
+ {"epoch": 19, "step": 3479, "loss": 0.07458849251270294, "total_norm": 0.3864719867706299}
3507
+ {"epoch": 19, "step": 3480, "loss": 0.06934340298175812, "total_norm": 0.4600399136543274}
3508
+ {"epoch": 19, "step": 3481, "loss": 0.06718819588422775, "total_norm": 0.4141007959842682}
3509
+ {"epoch": 19, "step": 3482, "loss": 0.06236109510064125, "total_norm": 0.36711663007736206}
3510
+ {"epoch": 19, "step": 3483, "loss": 0.06310893595218658, "total_norm": 0.42796778678894043}
3511
+ {"epoch": 19, "step": 3484, "loss": 0.07416108250617981, "total_norm": 0.4565519094467163}
3512
+ {"epoch": 19, "step": 3485, "loss": 0.06560900062322617, "total_norm": 0.3995119333267212}
3513
+ {"epoch": 19, "step": 3486, "loss": 0.06989232450723648, "total_norm": 0.4169926941394806}
3514
+ {"epoch": 19, "step": 3487, "loss": 0.07123791426420212, "total_norm": 0.39384472370147705}
3515
+ {"epoch": 19, "step": 3488, "loss": 0.07095292210578918, "total_norm": 0.4068931043148041}
3516
+ {"epoch": 19, "step": 3489, "loss": 0.06878367066383362, "total_norm": 0.41394373774528503}
3517
+ {"epoch": 19, "step": 3490, "loss": 0.06557326763868332, "total_norm": 0.41651469469070435}
3518
+ {"epoch": 19, "step": 3491, "loss": 0.07315421104431152, "total_norm": 0.4758340120315552}
3519
+ {"epoch": 19, "step": 3492, "loss": 0.07345067709684372, "total_norm": 0.3815555274486542}
3520
+ {"epoch": 19, "step": 3493, "loss": 0.07233898341655731, "total_norm": 0.4131311774253845}
3521
+ {"epoch": 19, "step": 3494, "loss": 0.07435332238674164, "total_norm": 0.41949114203453064}
3522
+ {"epoch": 19, "step": 3495, "loss": 0.06814467161893845, "total_norm": 0.387387752532959}
3523
+ {"epoch": 19, "step": 3496, "loss": 0.07183708995580673, "total_norm": 0.3951456844806671}
3524
+ {"epoch": 19, "step": 3497, "loss": 0.07346130162477493, "total_norm": 0.3674578368663788}
3525
+ {"epoch": 19, "step": 3498, "loss": 0.06627894937992096, "total_norm": 0.3926956057548523}
3526
+ {"epoch": 19, "step": 3499, "loss": 0.0776500403881073, "total_norm": 0.4104427695274353}
3527
+ {"epoch": 19, "step": 3500, "eval_loss": 5.0220290422439575, "eval_rougeL": 0.11872779423918271}
3528
+ {"epoch": 19, "step": 3500, "loss": 0.07207430899143219, "total_norm": 0.3702149987220764}
3529
+ {"epoch": 19, "step": 3501, "loss": 0.06158837303519249, "total_norm": 0.36664992570877075}
3530
+ {"epoch": 19, "step": 3502, "loss": 0.07010766118764877, "total_norm": 0.37997540831565857}
3531
+ {"epoch": 19, "step": 3503, "loss": 0.07500073313713074, "total_norm": 0.41642364859580994}
3532
+ {"epoch": 19, "step": 3504, "loss": 0.05905424803495407, "total_norm": 0.38566330075263977}
3533
+ {"epoch": 19, "step": 3505, "loss": 0.06727441400289536, "total_norm": 0.38475751876831055}
3534
+ {"epoch": 19, "step": 3506, "loss": 0.060138337314128876, "total_norm": 0.413043737411499}
3535
+ {"epoch": 19, "step": 3507, "loss": 0.06197485700249672, "total_norm": 0.3413826525211334}
3536
+ {"epoch": 19, "step": 3508, "loss": 0.06812082231044769, "total_norm": 0.38554421067237854}
3537
+ {"epoch": 19, "step": 3509, "loss": 0.0738016664981842, "total_norm": 0.45579251646995544}
3538
+ {"epoch": 19, "step": 3510, "loss": 0.06546374410390854, "total_norm": 0.3662172853946686}
3539
+ {"epoch": 19, "step": 3511, "loss": 0.06208968162536621, "total_norm": 0.39762791991233826}
3540
+ {"epoch": 19, "step": 3512, "loss": 0.07024646550416946, "total_norm": 0.5137607455253601}
3541
+ {"epoch": 19, "step": 3513, "loss": 0.07134389132261276, "total_norm": 0.4670751094818115}
3542
+ {"epoch": 19, "step": 3514, "loss": 0.07728653401136398, "total_norm": 0.3857883810997009}
3543
+ {"epoch": 19, "step": 3515, "loss": 0.06422069668769836, "total_norm": 0.4053337275981903}
3544
+ {"epoch": 19, "step": 3516, "loss": 0.06729034334421158, "total_norm": 0.38059550523757935}
3545
+ {"epoch": 19, "step": 3517, "loss": 0.07859373092651367, "total_norm": 0.48532700538635254}
3546
+ {"epoch": 19, "step": 3518, "loss": 0.060003913938999176, "total_norm": 0.35765382647514343}
3547
+ {"epoch": 19, "step": 3519, "loss": 0.07169795781373978, "total_norm": 0.3875722885131836}
3548
+ {"epoch": 19, "step": 3520, "loss": 0.06319695711135864, "total_norm": 0.3974921703338623}
3549
+ {"epoch": 19, "step": 3521, "loss": 0.07936239242553711, "total_norm": 0.4573519229888916}
3550
+ {"epoch": 19, "step": 3522, "loss": 0.06578812748193741, "total_norm": 0.39051687717437744}
3551
+ {"epoch": 19, "step": 3523, "loss": 0.06445008516311646, "total_norm": 0.39269962906837463}
3552
+ {"epoch": 19, "step": 3524, "loss": 0.07205944508314133, "total_norm": 0.3946966826915741}
3553
+ {"epoch": 19, "step": 3525, "loss": 0.06935378164052963, "total_norm": 0.40876826643943787}
3554
+ {"epoch": 19, "step": 3526, "loss": 0.06792822480201721, "total_norm": 0.39337751269340515}
3555
+ {"epoch": 19, "step": 3527, "loss": 0.07395108789205551, "total_norm": 0.4483739733695984}
3556
+ {"epoch": 19, "step": 3528, "loss": 0.06934802234172821, "total_norm": 0.3965384066104889}
3557
+ {"epoch": 19, "step": 3529, "loss": 0.0827133059501648, "total_norm": 0.44291070103645325}
3558
+ {"epoch": 19, "step": 3530, "loss": 0.07297205179929733, "total_norm": 0.47151389718055725}
3559
+ {"epoch": 19, "step": 3531, "loss": 0.06897492706775665, "total_norm": 0.3717750012874603}
3560
+ {"epoch": 19, "step": 3532, "loss": 0.06802594661712646, "total_norm": 0.3665638864040375}
3561
+ {"epoch": 19, "step": 3533, "loss": 0.07631628960371017, "total_norm": 0.4539519250392914}
3562
+ {"epoch": 19, "step": 3534, "loss": 0.06562040001153946, "total_norm": 0.3907945156097412}
3563
+ {"epoch": 19, "step": 3535, "loss": 0.07131240516901016, "total_norm": 0.40537747740745544}
3564
+ {"epoch": 19, "step": 3536, "loss": 0.06623981148004532, "total_norm": 0.37212270498275757}
3565
+ {"epoch": 19, "step": 3537, "loss": 0.05907493829727173, "total_norm": 0.3867167532444}
3566
+ {"epoch": 19, "step": 3538, "loss": 0.060727182775735855, "total_norm": 0.3760104775428772}
3567
+ {"epoch": 19, "step": 3539, "loss": 0.06602499634027481, "total_norm": 0.3716023564338684}
3568
+ {"epoch": 19, "step": 3540, "loss": 0.0792287215590477, "total_norm": 0.4493481516838074}
3569
+ {"epoch": 19, "step": 3541, "loss": 0.0659322738647461, "total_norm": 0.366845041513443}
3570
+ {"epoch": 19, "step": 3542, "loss": 0.06888958811759949, "total_norm": 0.38213247060775757}
3571
+ {"epoch": 19, "step": 3543, "loss": 0.07498490065336227, "total_norm": 0.39568865299224854}
3572
+ {"epoch": 19, "step": 3544, "loss": 0.06685784459114075, "total_norm": 0.3744715750217438}
3573
+ {"epoch": 19, "step": 3545, "loss": 0.06664440035820007, "total_norm": 0.40962454676628113}
3574
+ {"epoch": 19, "step": 3546, "loss": 0.06505069881677628, "total_norm": 0.3674473464488983}
3575
+ {"epoch": 19, "step": 3547, "loss": 0.06825530529022217, "total_norm": 0.42193111777305603}
3576
+ {"epoch": 19, "step": 3548, "loss": 0.07147165387868881, "total_norm": 0.4118800759315491}
3577
+ {"epoch": 19, "step": 3549, "loss": 0.08126255124807358, "total_norm": 0.4317931532859802}
3578
+ {"epoch": 19, "step": 3550, "loss": 0.07912220060825348, "total_norm": 0.4415907561779022}
3579
+ {"epoch": 19, "step": 3551, "loss": 0.07138849794864655, "total_norm": 0.3974316418170929}
3580
+ {"epoch": 19, "step": 3552, "loss": 0.07288537174463272, "total_norm": 0.5247356295585632}
3581
+ {"epoch": 19, "step": 3553, "loss": 0.0735028013586998, "total_norm": 0.42138493061065674}
3582
+ {"epoch": 19, "step": 3554, "loss": 0.07932400703430176, "total_norm": 0.4254576563835144}
3583
+ {"epoch": 19, "step": 3555, "loss": 0.0751042515039444, "total_norm": 0.4099126160144806}
3584
+ {"epoch": 19, "step": 3556, "loss": 0.0627933219075203, "total_norm": 0.3664637804031372}
3585
+ {"epoch": 19, "step": 3557, "loss": 0.06501071900129318, "total_norm": 0.3542080521583557}
3586
+ {"epoch": 19, "step": 3558, "loss": 0.07052960246801376, "total_norm": 0.44213640689849854}
3587
+ {"epoch": 19, "step": 3559, "loss": 0.06646731495857239, "total_norm": 0.396994024515152}
3588
+ {"epoch": 19, "step": 3560, "loss": 0.059404127299785614, "total_norm": 0.3919144570827484}
3589
+ {"epoch": 19, "step": 3561, "loss": 0.0678473636507988, "total_norm": 0.3929302990436554}
3590
+ {"epoch": 19, "step": 3562, "loss": 0.06528989225625992, "total_norm": 0.40572378039360046}
3591
+ {"epoch": 19, "step": 3563, "loss": 0.06991353631019592, "total_norm": 0.38744571805000305}
3592
+ {"epoch": 19, "step": 3564, "loss": 0.0819733589887619, "total_norm": 0.3923647999763489}
3593
+ {"epoch": 19, "step": 3565, "loss": 0.0787283182144165, "total_norm": 0.43464845418930054}
3594
+ {"epoch": 19, "step": 3566, "loss": 0.07298606634140015, "total_norm": 0.43444889783859253}
3595
+ {"epoch": 19, "step": 3567, "loss": 0.05901186913251877, "total_norm": 0.4918997287750244}
3596
+ {"epoch": 19, "step": 3568, "loss": 0.07650952786207199, "total_norm": 0.4300938844680786}
3597
+ {"epoch": 19, "step": 3569, "loss": 0.07255873084068298, "total_norm": 0.4154261350631714}
3598
+ {"epoch": 19, "step": 3570, "loss": 0.07922214269638062, "total_norm": 0.41404852271080017}
3599
+ {"epoch": 19, "step": 3571, "loss": 0.068200021982193, "total_norm": 0.4082469940185547}
3600
+ {"epoch": 19, "step": 3572, "loss": 0.07221460342407227, "total_norm": 0.4273666739463806}
3601
+ {"epoch": 19, "step": 3573, "loss": 0.07680253684520721, "total_norm": 0.47549837827682495}
3602
+ {"epoch": 19, "step": 3574, "loss": 0.0700637474656105, "total_norm": 0.3978300392627716}
3603
+ {"epoch": 19, "step": 3575, "loss": 0.06955338269472122, "total_norm": 0.41550689935684204}
3604
+ {"epoch": 19, "step": 3576, "loss": 0.07201962172985077, "total_norm": 0.3722953498363495}
3605
+ {"epoch": 19, "step": 3577, "loss": 0.07467301934957504, "total_norm": 0.444408118724823}
3606
+ {"epoch": 19, "step": 3578, "loss": 0.06600163131952286, "total_norm": 0.4090379774570465}
3607
+ {"epoch": 19, "step": 3579, "loss": 0.052284255623817444, "total_norm": 0.4305051267147064}
3608
+ {"epoch": 19, "step": 3580, "eval_loss": 5.02215451002121, "eval_rougeL": 0.11894351165818984}