Training in progress, step 1084, checkpoint

Files changed (7) hide show

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "up_proj",
     "v_proj",
-    "k_proj",
     "gate_proj",
-    "o_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "o_proj",
     "v_proj",
     "gate_proj",
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c99f0910b1d464cc9dd34ad013a78fc83c4e0e247274aa55f8ab30adb94fafd5
 size 1826969312

 version https://git-lfs.github.com/spec/v1
+oid sha256:e945ecb1105ccbd7db50609dd1debf061390155256fb3e7e171d4e4ff5c5cb3f
 size 1826969312

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e232076aca1d3fcd682dcf1388b713a0cf363f3d1c8f2c12a286c416a372802
 size 917657642

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e7608df305ea5de432c6e9824330a963c962321810e1114f34ce97e8baf43cd
 size 917657642

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac0420f99ddd52aad9a6f9534d329d57d42e52c6afff3a202493d8fac4921ba2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e14bf956cf6d8fbb7489f32fd451c5aa20421badd8b0dbc258528eb49d130239
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5204f9ec8b9d069eef9619035e1e46f89a9b2b30dc326e967f107c3b83b63f3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:50942b3c969aa466776eba40c03ac32e185a5fec3820381b12aaaffc91809f9f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.5745736360549927,
   "best_model_checkpoint": "/checkpoints/gemma2-27b-biomedical/checkpoint-500",
-  "epoch": 1.8445930366612866,
   "eval_steps": 100,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -437,6 +437,34 @@
       "eval_samples_per_second": 0.685,
       "eval_steps_per_second": 0.685,
       "step": 1000
     }
   ],
   "logging_steps": 20,
@@ -451,12 +479,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.605468946006016e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.5745736360549927,
   "best_model_checkpoint": "/checkpoints/gemma2-27b-biomedical/checkpoint-500",
+  "epoch": 1.9997694258704173,
   "eval_steps": 100,
+  "global_step": 1084,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.685,
       "eval_steps_per_second": 0.685,
       "step": 1000
+    },
+    {
+      "epoch": 1.8817154715240951,
+      "grad_norm": 0.5792846083641052,
+      "learning_rate": 1.824312967136299e-06,
+      "loss": 1.1208,
+      "step": 1020
+    },
+    {
+      "epoch": 1.9186073322573207,
+      "grad_norm": 0.6999746561050415,
+      "learning_rate": 8.636614587030356e-07,
+      "loss": 1.1133,
+      "step": 1040
+    },
+    {
+      "epoch": 1.9554991929905463,
+      "grad_norm": 0.6022835373878479,
+      "learning_rate": 2.572175119379683e-07,
+      "loss": 1.1469,
+      "step": 1060
+    },
+    {
+      "epoch": 1.9923910537237721,
+      "grad_norm": 0.6285136342048645,
+      "learning_rate": 7.1479108360916754e-09,
+      "loss": 1.1502,
+      "step": 1080
     }
   ],
   "logging_steps": 20,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.8243283374705213e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d72e5e897f7eeedb9a537d5a8eb67237cc9f0dc00099162f6e856b5955d6dfe
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:f84391dd2b6644a409861092551791dc6672e3d530edb0e666c77c87dd747030
 size 5496