Training in progress, step 195, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c58a025c9db4ca179a49fc9f20b9777af3de0aacd05cfcbb8608d68bb4a1da11
 size 22573704

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cedff16679b6d4e19a18ba7da00ee671d21d538f1705b2ca1e7ea02fe1354a2
 size 22573704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a697bb97ed2c88f376073e75285fd16cb0b989b2210315155baccbb6e1a2054
 size 11710970

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c67142372a3ce53f9073e0d7fa901d3cc1f0dcbddb925db1125901631745613
 size 11710970

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c6b8abea532445f7a2b8197f8311b5a4d582f8239dfa11c211ca7d24d646650
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:333f2a71b3e34575844f8fd8a45254433fb8a5c1731e62ba395b530a464ea2d4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b289b438571df9d34409287e67864402aaad98d9ecdf87ccd44b9abb7f5b6982
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a35430a05f2b9748f37dd11667a782564c85a35d840d60cbaddfa2c905ab7c0a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.33421284080914687,
   "eval_steps": 34,
-  "global_step": 190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1385,6 +1385,41 @@
       "learning_rate": 1.7026900316098215e-06,
       "loss": 0.9687,
       "step": 190
     }
   ],
   "logging_steps": 1,
@@ -1404,7 +1439,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0968875503976448e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.34300791556728233,
   "eval_steps": 34,
+  "global_step": 195,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.7026900316098215e-06,
       "loss": 0.9687,
       "step": 190
+    },
+    {
+      "epoch": 0.335971855760774,
+      "grad_norm": 0.8564959168434143,
+      "learning_rate": 1.3799252646597426e-06,
+      "loss": 0.933,
+      "step": 191
+    },
+    {
+      "epoch": 0.33773087071240104,
+      "grad_norm": 1.036962866783142,
+      "learning_rate": 1.0908391628854041e-06,
+      "loss": 0.9713,
+      "step": 192
+    },
+    {
+      "epoch": 0.33948988566402816,
+      "grad_norm": 0.919511616230011,
+      "learning_rate": 8.355304489257254e-07,
+      "loss": 1.0534,
+      "step": 193
+    },
+    {
+      "epoch": 0.3412489006156552,
+      "grad_norm": 0.9325530529022217,
+      "learning_rate": 6.140863104726391e-07,
+      "loss": 0.9465,
+      "step": 194
+    },
+    {
+      "epoch": 0.34300791556728233,
+      "grad_norm": 0.9085223078727722,
+      "learning_rate": 4.2658237049655323e-07,
+      "loss": 1.0461,
+      "step": 195
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.1257530122502144e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null