Đào Quốc Tuấn commited on
Commit
78d97ea
·
verified ·
1 Parent(s): a445a63

Upload folder using huggingface_hub

Browse files
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "transformers_version": "4.56.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.56.0"
6
+ }
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1feef2970e9106a8583f8204fdc67768969ef064d2925ae375962979a48d608
3
+ size 497774208
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
experiments/sft_gpt2-120m/20251118_153756/checkpoints/epoch_17/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251118_153756/sft_gpt2-120m-1.log CHANGED
@@ -2636,3 +2636,148 @@
2636
  2025-11-18 17:41:29,767 - root - INFO - Step 48701/57180 train rougeL: 0.34799434959939385
2637
  2025-11-18 17:41:29,801 - root - INFO - Step 48701/57180 loss: 0.06674124300479889, total_norm: 1.098740577697754
2638
  2025-11-18 17:41:34,643 - absl - INFO - Using default tokenizer.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2636
  2025-11-18 17:41:29,767 - root - INFO - Step 48701/57180 train rougeL: 0.34799434959939385
2637
  2025-11-18 17:41:29,801 - root - INFO - Step 48701/57180 loss: 0.06674124300479889, total_norm: 1.098740577697754
2638
  2025-11-18 17:41:34,643 - absl - INFO - Using default tokenizer.
2639
+ 2025-11-18 17:41:36,456 - root - INFO - Step 48801/57180 train rougeL: 0.5338936129015467
2640
+ 2025-11-18 17:41:36,491 - root - INFO - Step 48801/57180 loss: 0.02021036297082901, total_norm: 0.37200063467025757
2641
+ 2025-11-18 17:41:41,251 - absl - INFO - Using default tokenizer.
2642
+ 2025-11-18 17:41:43,048 - root - INFO - Step 48901/57180 train rougeL: 0.15424455892890404
2643
+ 2025-11-18 17:41:43,082 - root - INFO - Step 48901/57180 loss: 0.006466233171522617, total_norm: 0.34377801418304443
2644
+ 2025-11-18 17:41:47,814 - root - INFO - Step 49001/57180 finished
2645
+ 2025-11-18 17:41:48,141 - absl - INFO - Using default tokenizer.
2646
+ 2025-11-18 17:41:52,664 - absl - INFO - Using default tokenizer.
2647
+ 2025-11-18 17:41:57,105 - absl - INFO - Using default tokenizer.
2648
+ 2025-11-18 17:42:01,587 - absl - INFO - Using default tokenizer.
2649
+ 2025-11-18 17:42:06,280 - absl - INFO - Using default tokenizer.
2650
+ 2025-11-18 17:42:10,737 - absl - INFO - Using default tokenizer.
2651
+ 2025-11-18 17:42:15,175 - absl - INFO - Using default tokenizer.
2652
+ 2025-11-18 17:42:19,558 - absl - INFO - Using default tokenizer.
2653
+ 2025-11-18 17:42:23,074 - root - INFO - Epoch 18/20 eval loss: 6.251601159572601, eval rougeL: 0.11391684241703591
2654
+ 2025-11-18 17:42:23,090 - absl - INFO - Using default tokenizer.
2655
+ 2025-11-18 17:42:24,873 - root - INFO - Step 49001/57180 train rougeL: 0.12367111898596157
2656
+ 2025-11-18 17:42:24,908 - root - INFO - Step 49001/57180 loss: 0.0027590582612901926, total_norm: 0.3064168393611908
2657
+ 2025-11-18 17:42:29,647 - absl - INFO - Using default tokenizer.
2658
+ 2025-11-18 17:42:31,439 - root - INFO - Step 49101/57180 train rougeL: 0.2591626667587509
2659
+ 2025-11-18 17:42:31,472 - root - INFO - Step 49101/57180 loss: 0.0029959736857563257, total_norm: 0.2954358160495758
2660
+ 2025-11-18 17:42:36,211 - absl - INFO - Using default tokenizer.
2661
+ 2025-11-18 17:42:38,004 - root - INFO - Step 49201/57180 train rougeL: 0.3989785032489062
2662
+ 2025-11-18 17:42:38,038 - root - INFO - Step 49201/57180 loss: 0.00800960324704647, total_norm: 0.20073983073234558
2663
+ 2025-11-18 17:42:42,794 - absl - INFO - Using default tokenizer.
2664
+ 2025-11-18 17:42:44,583 - root - INFO - Step 49301/57180 train rougeL: 0.32575272568659785
2665
+ 2025-11-18 17:42:44,617 - root - INFO - Step 49301/57180 loss: 0.008075157180428505, total_norm: 0.25261831283569336
2666
+ 2025-11-18 17:42:49,369 - absl - INFO - Using default tokenizer.
2667
+ 2025-11-18 17:42:51,152 - root - INFO - Step 49401/57180 train rougeL: 0.38293353541588165
2668
+ 2025-11-18 17:42:51,186 - root - INFO - Step 49401/57180 loss: 0.006646967958658934, total_norm: 0.25455766916275024
2669
+ 2025-11-18 17:42:55,918 - root - INFO - Step 49501/57180 finished
2670
+ 2025-11-18 17:42:56,246 - absl - INFO - Using default tokenizer.
2671
+ 2025-11-18 17:43:00,766 - absl - INFO - Using default tokenizer.
2672
+ 2025-11-18 17:43:05,218 - absl - INFO - Using default tokenizer.
2673
+ 2025-11-18 17:43:09,687 - absl - INFO - Using default tokenizer.
2674
+ 2025-11-18 17:43:14,206 - absl - INFO - Using default tokenizer.
2675
+ 2025-11-18 17:43:18,670 - absl - INFO - Using default tokenizer.
2676
+ 2025-11-18 17:43:23,123 - absl - INFO - Using default tokenizer.
2677
+ 2025-11-18 17:43:27,515 - absl - INFO - Using default tokenizer.
2678
+ 2025-11-18 17:43:31,169 - root - INFO - Epoch 18/20 eval loss: 6.256913363933563, eval rougeL: 0.11355955158754862
2679
+ 2025-11-18 17:43:31,185 - absl - INFO - Using default tokenizer.
2680
+ 2025-11-18 17:43:32,974 - root - INFO - Step 49501/57180 train rougeL: 0.4684565985593804
2681
+ 2025-11-18 17:43:33,008 - root - INFO - Step 49501/57180 loss: 0.019273219630122185, total_norm: 0.5774557590484619
2682
+ 2025-11-18 17:43:37,742 - absl - INFO - Using default tokenizer.
2683
+ 2025-11-18 17:43:39,513 - root - INFO - Step 49601/57180 train rougeL: 0.24085766974138503
2684
+ 2025-11-18 17:43:39,547 - root - INFO - Step 49601/57180 loss: 0.006047072820365429, total_norm: 0.22315384447574615
2685
+ 2025-11-18 17:43:44,291 - absl - INFO - Using default tokenizer.
2686
+ 2025-11-18 17:43:46,068 - root - INFO - Step 49701/57180 train rougeL: 0.41989641173303116
2687
+ 2025-11-18 17:43:46,102 - root - INFO - Step 49701/57180 loss: 0.007980656810104847, total_norm: 0.2539476454257965
2688
+ 2025-11-18 17:43:50,839 - absl - INFO - Using default tokenizer.
2689
+ 2025-11-18 17:43:52,616 - root - INFO - Step 49801/57180 train rougeL: 0.10681268076207678
2690
+ 2025-11-18 17:43:52,650 - root - INFO - Step 49801/57180 loss: 0.0008549483027309179, total_norm: 0.05990957096219063
2691
+ 2025-11-18 17:43:57,386 - absl - INFO - Using default tokenizer.
2692
+ 2025-11-18 17:43:59,164 - root - INFO - Step 49901/57180 train rougeL: 0.1850522722318323
2693
+ 2025-11-18 17:43:59,198 - root - INFO - Step 49901/57180 loss: 0.005856649484485388, total_norm: 0.2896975874900818
2694
+ 2025-11-18 17:44:03,917 - root - INFO - Step 50001/57180 finished
2695
+ 2025-11-18 17:44:04,243 - absl - INFO - Using default tokenizer.
2696
+ 2025-11-18 17:44:08,754 - absl - INFO - Using default tokenizer.
2697
+ 2025-11-18 17:44:13,195 - absl - INFO - Using default tokenizer.
2698
+ 2025-11-18 17:44:17,668 - absl - INFO - Using default tokenizer.
2699
+ 2025-11-18 17:44:22,210 - absl - INFO - Using default tokenizer.
2700
+ 2025-11-18 17:44:26,687 - absl - INFO - Using default tokenizer.
2701
+ 2025-11-18 17:44:31,141 - absl - INFO - Using default tokenizer.
2702
+ 2025-11-18 17:44:35,535 - absl - INFO - Using default tokenizer.
2703
+ 2025-11-18 17:44:39,067 - root - INFO - Epoch 18/20 eval loss: 6.267626345157623, eval rougeL: 0.1134232193755238
2704
+ 2025-11-18 17:44:39,084 - absl - INFO - Using default tokenizer.
2705
+ 2025-11-18 17:44:40,895 - root - INFO - Step 50001/57180 train rougeL: 0.21943553157534842
2706
+ 2025-11-18 17:44:40,930 - root - INFO - Step 50001/57180 loss: 0.008561826311051846, total_norm: 0.2448900043964386
2707
+ 2025-11-18 17:44:45,711 - absl - INFO - Using default tokenizer.
2708
+ 2025-11-18 17:44:47,733 - root - INFO - Step 50101/57180 train rougeL: 0.4082445552374566
2709
+ 2025-11-18 17:44:47,770 - root - INFO - Step 50101/57180 loss: 0.010430201888084412, total_norm: 0.297516405582428
2710
+ 2025-11-18 17:44:52,541 - absl - INFO - Using default tokenizer.
2711
+ 2025-11-18 17:44:54,517 - root - INFO - Step 50201/57180 train rougeL: 0.41006920693934346
2712
+ 2025-11-18 17:44:54,552 - root - INFO - Step 50201/57180 loss: 0.016712741926312447, total_norm: 0.5132530927658081
2713
+ 2025-11-18 17:44:59,311 - absl - INFO - Using default tokenizer.
2714
+ 2025-11-18 17:45:01,099 - root - INFO - Step 50301/57180 train rougeL: 0.45544185826726724
2715
+ 2025-11-18 17:45:01,133 - root - INFO - Step 50301/57180 loss: 0.02175765484571457, total_norm: 0.6634148359298706
2716
+ 2025-11-18 17:45:05,876 - absl - INFO - Using default tokenizer.
2717
+ 2025-11-18 17:45:07,655 - root - INFO - Step 50401/57180 train rougeL: 0.12646313464182748
2718
+ 2025-11-18 17:45:07,688 - root - INFO - Step 50401/57180 loss: 0.02486201375722885, total_norm: 1.3527798652648926
2719
+ 2025-11-18 17:45:12,377 - root - INFO - Step 50501/57180 finished
2720
+ 2025-11-18 17:45:12,703 - absl - INFO - Using default tokenizer.
2721
+ 2025-11-18 17:45:17,213 - absl - INFO - Using default tokenizer.
2722
+ 2025-11-18 17:45:21,647 - absl - INFO - Using default tokenizer.
2723
+ 2025-11-18 17:45:26,139 - absl - INFO - Using default tokenizer.
2724
+ 2025-11-18 17:45:30,655 - absl - INFO - Using default tokenizer.
2725
+ 2025-11-18 17:45:35,119 - absl - INFO - Using default tokenizer.
2726
+ 2025-11-18 17:45:39,567 - absl - INFO - Using default tokenizer.
2727
+ 2025-11-18 17:45:43,945 - absl - INFO - Using default tokenizer.
2728
+ 2025-11-18 17:45:47,457 - root - INFO - Epoch 18/20 eval loss: 6.273983538150787, eval rougeL: 0.1148257422658587
2729
+ 2025-11-18 17:45:47,472 - absl - INFO - Using default tokenizer.
2730
+ 2025-11-18 17:45:49,255 - root - INFO - Step 50501/57180 train rougeL: 0.23590418073471214
2731
+ 2025-11-18 17:45:49,289 - root - INFO - Step 50501/57180 loss: 0.018590204417705536, total_norm: 1.1816843748092651
2732
+ 2025-11-18 17:45:53,989 - absl - INFO - Using default tokenizer.
2733
+ 2025-11-18 17:45:55,786 - root - INFO - Step 50601/57180 train rougeL: 0.5321582170795359
2734
+ 2025-11-18 17:45:55,819 - root - INFO - Step 50601/57180 loss: 0.018219193443655968, total_norm: 0.43898823857307434
2735
+ 2025-11-18 17:46:00,522 - absl - INFO - Using default tokenizer.
2736
+ 2025-11-18 17:46:02,310 - root - INFO - Step 50701/57180 train rougeL: 0.1816877436707009
2737
+ 2025-11-18 17:46:02,344 - root - INFO - Step 50701/57180 loss: 0.003763582557439804, total_norm: 0.14448025822639465
2738
+ 2025-11-18 17:46:07,045 - absl - INFO - Using default tokenizer.
2739
+ 2025-11-18 17:46:08,833 - root - INFO - Step 50801/57180 train rougeL: 0.42169163031776624
2740
+ 2025-11-18 17:46:08,867 - root - INFO - Step 50801/57180 loss: 0.0031370981596410275, total_norm: 0.11518161743879318
2741
+ 2025-11-18 17:46:13,566 - absl - INFO - Using default tokenizer.
2742
+ 2025-11-18 17:46:15,354 - root - INFO - Step 50901/57180 train rougeL: 0.5912828582981644
2743
+ 2025-11-18 17:46:15,388 - root - INFO - Step 50901/57180 loss: 0.03974827006459236, total_norm: 0.5058509111404419
2744
+ 2025-11-18 17:46:20,246 - root - INFO - Step 51001/57180 finished
2745
+ 2025-11-18 17:46:20,579 - absl - INFO - Using default tokenizer.
2746
+ 2025-11-18 17:46:25,078 - absl - INFO - Using default tokenizer.
2747
+ 2025-11-18 17:46:29,501 - absl - INFO - Using default tokenizer.
2748
+ 2025-11-18 17:46:33,998 - absl - INFO - Using default tokenizer.
2749
+ 2025-11-18 17:46:38,516 - absl - INFO - Using default tokenizer.
2750
+ 2025-11-18 17:46:42,958 - absl - INFO - Using default tokenizer.
2751
+ 2025-11-18 17:46:47,396 - absl - INFO - Using default tokenizer.
2752
+ 2025-11-18 17:46:51,768 - absl - INFO - Using default tokenizer.
2753
+ 2025-11-18 17:46:55,280 - root - INFO - Epoch 18/20 eval loss: 6.276162922382355, eval rougeL: 0.11273383990039632
2754
+ 2025-11-18 17:46:55,296 - absl - INFO - Using default tokenizer.
2755
+ 2025-11-18 17:46:57,085 - root - INFO - Step 51001/57180 train rougeL: 0.37787669737485347
2756
+ 2025-11-18 17:46:57,119 - root - INFO - Step 51001/57180 loss: 0.02337406575679779, total_norm: 0.6599317789077759
2757
+ 2025-11-18 17:47:01,824 - absl - INFO - Using default tokenizer.
2758
+ 2025-11-18 17:47:03,607 - root - INFO - Step 51101/57180 train rougeL: 0.28926086991571043
2759
+ 2025-11-18 17:47:03,640 - root - INFO - Step 51101/57180 loss: 0.005989678669720888, total_norm: 0.29516297578811646
2760
+ 2025-11-18 17:47:08,341 - absl - INFO - Using default tokenizer.
2761
+ 2025-11-18 17:47:10,139 - root - INFO - Step 51201/57180 train rougeL: 0.49032163708365817
2762
+ 2025-11-18 17:47:10,173 - root - INFO - Step 51201/57180 loss: 0.03332280367612839, total_norm: 0.7766381502151489
2763
+ 2025-11-18 17:47:14,890 - absl - INFO - Using default tokenizer.
2764
+ 2025-11-18 17:47:16,704 - root - INFO - Step 51301/57180 train rougeL: 0.38884764684991013
2765
+ 2025-11-18 17:47:16,737 - root - INFO - Step 51301/57180 loss: 0.022941358387470245, total_norm: 1.0186429023742676
2766
+ 2025-11-18 17:47:21,487 - absl - INFO - Using default tokenizer.
2767
+ 2025-11-18 17:47:23,282 - root - INFO - Step 51401/57180 train rougeL: 0.27959066764170815
2768
+ 2025-11-18 17:47:23,316 - root - INFO - Step 51401/57180 loss: 0.022026797756552696, total_norm: 0.4584163725376129
2769
+ 2025-11-18 17:47:26,214 - root - INFO - Epoch 18/20 finished
2770
+ 2025-11-18 17:47:26,540 - absl - INFO - Using default tokenizer.
2771
+ 2025-11-18 17:47:31,061 - absl - INFO - Using default tokenizer.
2772
+ 2025-11-18 17:47:35,510 - absl - INFO - Using default tokenizer.
2773
+ 2025-11-18 17:47:39,994 - absl - INFO - Using default tokenizer.
2774
+ 2025-11-18 17:47:44,516 - absl - INFO - Using default tokenizer.
2775
+ 2025-11-18 17:47:49,168 - absl - INFO - Using default tokenizer.
2776
+ 2025-11-18 17:47:53,617 - absl - INFO - Using default tokenizer.
2777
+ 2025-11-18 17:47:57,995 - absl - INFO - Using default tokenizer.
2778
+ 2025-11-18 17:48:01,511 - root - INFO - Epoch 18/20 eval loss: 6.274387300014496, eval rougeL: 0.11338182900446833
2779
+ 2025-11-18 17:48:02,211 - root - INFO - Epoch 19/20
2780
+ 2025-11-18 17:48:04,032 - root - INFO - Step 51501/57180 finished
2781
+ 2025-11-18 17:48:04,417 - absl - INFO - Using default tokenizer.
2782
+ 2025-11-18 17:48:08,994 - absl - INFO - Using default tokenizer.
2783
+ 2025-11-18 17:48:13,488 - absl - INFO - Using default tokenizer.
experiments/sft_gpt2-120m/20251118_153756/sft_gpt2-120m-1_metrics.jsonl CHANGED
The diff for this file is too large to render. See raw diff