besimray commited on
Commit
ed93eb9
·
verified ·
1 Parent(s): 9be00cd

Training in progress, step 240, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45af38183fac4bdd87c06ee94f18b22a7da1068712890ae4063748b65e92320a
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea176b2890f5c039b27382d7c55bd1ece54713333c0ee47f4372f34e426564c
3
  size 22573704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc8aaca7431e39887e25a3b439419774272ece5fa8db24c537c5201d3f93250b
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f80fdd9f8732871c70824003344dbea9f535560e1447468a53254338e40aee6
3
  size 11710970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4145db56e94116de16125ac48b142c9c5702f370950afc0d1127ba20ea13b21f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a329987e98f6034737aac0f1b5e07f42a7e6f741cd8b93f9ba9996e6dd5f9b04
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21bd9360c166d9c3550bf3e56da2dc8de427467685d616ad1024770199c37aea
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a53d552dcaadb831ec7d361f91d388f352dfbf94dc56ae13700304776b06cd3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.161059856414795,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-225",
4
- "epoch": 1.236842105263158,
5
  "eval_steps": 5,
6
- "global_step": 235,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2036,6 +2036,49 @@
2036
  "eval_samples_per_second": 4.436,
2037
  "eval_steps_per_second": 0.444,
2038
  "step": 235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2039
  }
2040
  ],
2041
  "logging_steps": 1,
@@ -2050,7 +2093,7 @@
2050
  "early_stopping_threshold": 0.0
2051
  },
2052
  "attributes": {
2053
- "early_stopping_patience_counter": 2
2054
  }
2055
  },
2056
  "TrainerControl": {
@@ -2059,12 +2102,12 @@
2059
  "should_evaluate": false,
2060
  "should_log": false,
2061
  "should_save": true,
2062
- "should_training_stop": false
2063
  },
2064
  "attributes": {}
2065
  }
2066
  },
2067
- "total_flos": 5.65281961279488e+16,
2068
  "train_batch_size": 10,
2069
  "trial_name": null,
2070
  "trial_params": null
 
1
  {
2
  "best_metric": 1.161059856414795,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-225",
4
+ "epoch": 1.263157894736842,
5
  "eval_steps": 5,
6
+ "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2036
  "eval_samples_per_second": 4.436,
2037
  "eval_steps_per_second": 0.444,
2038
  "step": 235
2039
+ },
2040
+ {
2041
+ "epoch": 1.2421052631578948,
2042
+ "grad_norm": 1.3955504894256592,
2043
+ "learning_rate": 1.7280528409146097e-05,
2044
+ "loss": 1.6323,
2045
+ "step": 236
2046
+ },
2047
+ {
2048
+ "epoch": 1.2473684210526317,
2049
+ "grad_norm": 0.9354751110076904,
2050
+ "learning_rate": 1.7257576857089397e-05,
2051
+ "loss": 1.2273,
2052
+ "step": 237
2053
+ },
2054
+ {
2055
+ "epoch": 1.2526315789473683,
2056
+ "grad_norm": 0.7192204594612122,
2057
+ "learning_rate": 1.7234544239679807e-05,
2058
+ "loss": 1.2498,
2059
+ "step": 238
2060
+ },
2061
+ {
2062
+ "epoch": 1.2578947368421054,
2063
+ "grad_norm": 0.706244170665741,
2064
+ "learning_rate": 1.721143081418601e-05,
2065
+ "loss": 1.0584,
2066
+ "step": 239
2067
+ },
2068
+ {
2069
+ "epoch": 1.263157894736842,
2070
+ "grad_norm": 0.7391364574432373,
2071
+ "learning_rate": 1.7188236838779297e-05,
2072
+ "loss": 1.1814,
2073
+ "step": 240
2074
+ },
2075
+ {
2076
+ "epoch": 1.263157894736842,
2077
+ "eval_loss": 1.162298321723938,
2078
+ "eval_runtime": 22.5363,
2079
+ "eval_samples_per_second": 4.437,
2080
+ "eval_steps_per_second": 0.444,
2081
+ "step": 240
2082
  }
2083
  ],
2084
  "logging_steps": 1,
 
2093
  "early_stopping_threshold": 0.0
2094
  },
2095
  "attributes": {
2096
+ "early_stopping_patience_counter": 3
2097
  }
2098
  },
2099
  "TrainerControl": {
 
2102
  "should_evaluate": false,
2103
  "should_log": false,
2104
  "should_save": true,
2105
+ "should_training_stop": true
2106
  },
2107
  "attributes": {}
2108
  }
2109
  },
2110
+ "total_flos": 5.77309237051392e+16,
2111
  "train_batch_size": 10,
2112
  "trial_name": null,
2113
  "trial_params": null