Instructions to use lexlms/legal-roberta-large with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use lexlms/legal-roberta-large with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="lexlms/legal-roberta-large")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("lexlms/legal-roberta-large") model = AutoModelForMaskedLM.from_pretrained("lexlms/legal-roberta-large") - Notebooks
- Google Colab
- Kaggle
Commit ·
cfec863
1
Parent(s): 0e371ee
Training in progress, step 550000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +311 -3
- pytorch_model.bin +1 -1
- runs/Nov16_15-57-21_t1v-n-7cb529b4-w-0/events.out.tfevents.1668614313.t1v-n-7cb529b4-w-0.98881.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2841350745
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10c1cc07ef015417fadf62732c34ef9e27cbb3cd66409b4e91a7c9b7ed63de45
|
| 3 |
size 2841350745
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1420697771
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73d32e670c1a25deab1c7c4e62d918d91ced49b69c4bf7f2580c58cd7bb52ed3
|
| 3 |
size 1420697771
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1500d1fe2b65abcd5681b96da84daebed56ca0b2a7ec9f842129b4aced1c41e
|
| 3 |
size 13611
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f512681b5759af541156fd5f082e96f6aa34fd2c86fde1147ba2a2e83124f874
|
| 3 |
size 13611
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8568a3c86e0e22a7c03d27e124df5c47e2f09dfc131671de0230cc4216fda8f
|
| 3 |
size 13611
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1500d1fe2b65abcd5681b96da84daebed56ca0b2a7ec9f842129b4aced1c41e
|
| 3 |
size 13611
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1500d1fe2b65abcd5681b96da84daebed56ca0b2a7ec9f842129b4aced1c41e
|
| 3 |
size 13611
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8568a3c86e0e22a7c03d27e124df5c47e2f09dfc131671de0230cc4216fda8f
|
| 3 |
size 13611
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f512681b5759af541156fd5f082e96f6aa34fd2c86fde1147ba2a2e83124f874
|
| 3 |
size 13611
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f512681b5759af541156fd5f082e96f6aa34fd2c86fde1147ba2a2e83124f874
|
| 3 |
size 13611
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b390312ddc1614538c7fd82ca2c4639dfed127a83cb04c40dedde6f67b4e460
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -3086,11 +3086,319 @@
|
|
| 3086 |
"eval_samples_per_second": 261.931,
|
| 3087 |
"eval_steps_per_second": 4.112,
|
| 3088 |
"step": 500000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3089 |
}
|
| 3090 |
],
|
| 3091 |
"max_steps": 1000000,
|
| 3092 |
"num_train_epochs": 9223372036854775807,
|
| 3093 |
-
"total_flos": 1.
|
| 3094 |
"trial_name": null,
|
| 3095 |
"trial_params": null
|
| 3096 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.015997,
|
| 5 |
+
"global_step": 550000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 3086 |
"eval_samples_per_second": 261.931,
|
| 3087 |
"eval_steps_per_second": 4.112,
|
| 3088 |
"step": 500000
|
| 3089 |
+
},
|
| 3090 |
+
{
|
| 3091 |
+
"epoch": 0.5,
|
| 3092 |
+
"learning_rate": 5.396416275909779e-05,
|
| 3093 |
+
"loss": 0.905,
|
| 3094 |
+
"step": 501000
|
| 3095 |
+
},
|
| 3096 |
+
{
|
| 3097 |
+
"epoch": 0.5,
|
| 3098 |
+
"learning_rate": 5.379931489313016e-05,
|
| 3099 |
+
"loss": 0.8947,
|
| 3100 |
+
"step": 502000
|
| 3101 |
+
},
|
| 3102 |
+
{
|
| 3103 |
+
"epoch": 0.5,
|
| 3104 |
+
"learning_rate": 5.363442547846356e-05,
|
| 3105 |
+
"loss": 0.8928,
|
| 3106 |
+
"step": 503000
|
| 3107 |
+
},
|
| 3108 |
+
{
|
| 3109 |
+
"epoch": 0.5,
|
| 3110 |
+
"learning_rate": 5.3469496318302204e-05,
|
| 3111 |
+
"loss": 0.8862,
|
| 3112 |
+
"step": 504000
|
| 3113 |
+
},
|
| 3114 |
+
{
|
| 3115 |
+
"epoch": 0.51,
|
| 3116 |
+
"learning_rate": 5.330452921628497e-05,
|
| 3117 |
+
"loss": 0.8856,
|
| 3118 |
+
"step": 505000
|
| 3119 |
+
},
|
| 3120 |
+
{
|
| 3121 |
+
"epoch": 0.51,
|
| 3122 |
+
"learning_rate": 5.313952597646568e-05,
|
| 3123 |
+
"loss": 0.8765,
|
| 3124 |
+
"step": 506000
|
| 3125 |
+
},
|
| 3126 |
+
{
|
| 3127 |
+
"epoch": 0.51,
|
| 3128 |
+
"learning_rate": 5.297448840329329e-05,
|
| 3129 |
+
"loss": 0.895,
|
| 3130 |
+
"step": 507000
|
| 3131 |
+
},
|
| 3132 |
+
{
|
| 3133 |
+
"epoch": 0.51,
|
| 3134 |
+
"learning_rate": 5.280941830159227e-05,
|
| 3135 |
+
"loss": 0.8952,
|
| 3136 |
+
"step": 508000
|
| 3137 |
+
},
|
| 3138 |
+
{
|
| 3139 |
+
"epoch": 0.51,
|
| 3140 |
+
"learning_rate": 5.264431747654284e-05,
|
| 3141 |
+
"loss": 0.8737,
|
| 3142 |
+
"step": 509000
|
| 3143 |
+
},
|
| 3144 |
+
{
|
| 3145 |
+
"epoch": 0.51,
|
| 3146 |
+
"learning_rate": 5.247918773366112e-05,
|
| 3147 |
+
"loss": 0.8797,
|
| 3148 |
+
"step": 510000
|
| 3149 |
+
},
|
| 3150 |
+
{
|
| 3151 |
+
"epoch": 0.51,
|
| 3152 |
+
"learning_rate": 5.231403087877955e-05,
|
| 3153 |
+
"loss": 0.8925,
|
| 3154 |
+
"step": 511000
|
| 3155 |
+
},
|
| 3156 |
+
{
|
| 3157 |
+
"epoch": 0.51,
|
| 3158 |
+
"learning_rate": 5.214884871802703e-05,
|
| 3159 |
+
"loss": 0.8906,
|
| 3160 |
+
"step": 512000
|
| 3161 |
+
},
|
| 3162 |
+
{
|
| 3163 |
+
"epoch": 0.51,
|
| 3164 |
+
"learning_rate": 5.198364305780922e-05,
|
| 3165 |
+
"loss": 0.8923,
|
| 3166 |
+
"step": 513000
|
| 3167 |
+
},
|
| 3168 |
+
{
|
| 3169 |
+
"epoch": 0.51,
|
| 3170 |
+
"learning_rate": 5.1818415704788725e-05,
|
| 3171 |
+
"loss": 0.8959,
|
| 3172 |
+
"step": 514000
|
| 3173 |
+
},
|
| 3174 |
+
{
|
| 3175 |
+
"epoch": 0.52,
|
| 3176 |
+
"learning_rate": 5.165316846586541e-05,
|
| 3177 |
+
"loss": 0.8917,
|
| 3178 |
+
"step": 515000
|
| 3179 |
+
},
|
| 3180 |
+
{
|
| 3181 |
+
"epoch": 0.52,
|
| 3182 |
+
"learning_rate": 5.148790314815663e-05,
|
| 3183 |
+
"loss": 0.8944,
|
| 3184 |
+
"step": 516000
|
| 3185 |
+
},
|
| 3186 |
+
{
|
| 3187 |
+
"epoch": 0.52,
|
| 3188 |
+
"learning_rate": 5.132262155897739e-05,
|
| 3189 |
+
"loss": 0.896,
|
| 3190 |
+
"step": 517000
|
| 3191 |
+
},
|
| 3192 |
+
{
|
| 3193 |
+
"epoch": 0.52,
|
| 3194 |
+
"learning_rate": 5.1157325505820694e-05,
|
| 3195 |
+
"loss": 0.8967,
|
| 3196 |
+
"step": 518000
|
| 3197 |
+
},
|
| 3198 |
+
{
|
| 3199 |
+
"epoch": 0.52,
|
| 3200 |
+
"learning_rate": 5.0992016796337686e-05,
|
| 3201 |
+
"loss": 0.8809,
|
| 3202 |
+
"step": 519000
|
| 3203 |
+
},
|
| 3204 |
+
{
|
| 3205 |
+
"epoch": 0.52,
|
| 3206 |
+
"learning_rate": 5.0826697238317935e-05,
|
| 3207 |
+
"loss": 0.875,
|
| 3208 |
+
"step": 520000
|
| 3209 |
+
},
|
| 3210 |
+
{
|
| 3211 |
+
"epoch": 0.52,
|
| 3212 |
+
"learning_rate": 5.066136863966963e-05,
|
| 3213 |
+
"loss": 0.8861,
|
| 3214 |
+
"step": 521000
|
| 3215 |
+
},
|
| 3216 |
+
{
|
| 3217 |
+
"epoch": 0.52,
|
| 3218 |
+
"learning_rate": 5.0496032808399815e-05,
|
| 3219 |
+
"loss": 0.8731,
|
| 3220 |
+
"step": 522000
|
| 3221 |
+
},
|
| 3222 |
+
{
|
| 3223 |
+
"epoch": 0.52,
|
| 3224 |
+
"learning_rate": 5.033069155259471e-05,
|
| 3225 |
+
"loss": 0.8644,
|
| 3226 |
+
"step": 523000
|
| 3227 |
+
},
|
| 3228 |
+
{
|
| 3229 |
+
"epoch": 0.52,
|
| 3230 |
+
"learning_rate": 5.016534668039976e-05,
|
| 3231 |
+
"loss": 0.8874,
|
| 3232 |
+
"step": 524000
|
| 3233 |
+
},
|
| 3234 |
+
{
|
| 3235 |
+
"epoch": 0.53,
|
| 3236 |
+
"learning_rate": 5e-05,
|
| 3237 |
+
"loss": 0.8767,
|
| 3238 |
+
"step": 525000
|
| 3239 |
+
},
|
| 3240 |
+
{
|
| 3241 |
+
"epoch": 0.53,
|
| 3242 |
+
"learning_rate": 4.9834653319600246e-05,
|
| 3243 |
+
"loss": 0.8784,
|
| 3244 |
+
"step": 526000
|
| 3245 |
+
},
|
| 3246 |
+
{
|
| 3247 |
+
"epoch": 0.53,
|
| 3248 |
+
"learning_rate": 4.96693084474053e-05,
|
| 3249 |
+
"loss": 0.8803,
|
| 3250 |
+
"step": 527000
|
| 3251 |
+
},
|
| 3252 |
+
{
|
| 3253 |
+
"epoch": 0.53,
|
| 3254 |
+
"learning_rate": 4.950396719160018e-05,
|
| 3255 |
+
"loss": 0.8743,
|
| 3256 |
+
"step": 528000
|
| 3257 |
+
},
|
| 3258 |
+
{
|
| 3259 |
+
"epoch": 0.53,
|
| 3260 |
+
"learning_rate": 4.93386313603304e-05,
|
| 3261 |
+
"loss": 0.8752,
|
| 3262 |
+
"step": 529000
|
| 3263 |
+
},
|
| 3264 |
+
{
|
| 3265 |
+
"epoch": 0.53,
|
| 3266 |
+
"learning_rate": 4.917330276168208e-05,
|
| 3267 |
+
"loss": 0.8542,
|
| 3268 |
+
"step": 530000
|
| 3269 |
+
},
|
| 3270 |
+
{
|
| 3271 |
+
"epoch": 0.53,
|
| 3272 |
+
"learning_rate": 4.9007983203662326e-05,
|
| 3273 |
+
"loss": 0.8749,
|
| 3274 |
+
"step": 531000
|
| 3275 |
+
},
|
| 3276 |
+
{
|
| 3277 |
+
"epoch": 0.53,
|
| 3278 |
+
"learning_rate": 4.884267449417931e-05,
|
| 3279 |
+
"loss": 0.8713,
|
| 3280 |
+
"step": 532000
|
| 3281 |
+
},
|
| 3282 |
+
{
|
| 3283 |
+
"epoch": 0.53,
|
| 3284 |
+
"learning_rate": 4.867737844102261e-05,
|
| 3285 |
+
"loss": 0.8678,
|
| 3286 |
+
"step": 533000
|
| 3287 |
+
},
|
| 3288 |
+
{
|
| 3289 |
+
"epoch": 0.53,
|
| 3290 |
+
"learning_rate": 4.851209685184338e-05,
|
| 3291 |
+
"loss": 0.8818,
|
| 3292 |
+
"step": 534000
|
| 3293 |
+
},
|
| 3294 |
+
{
|
| 3295 |
+
"epoch": 1.0,
|
| 3296 |
+
"learning_rate": 4.834683153413459e-05,
|
| 3297 |
+
"loss": 0.8782,
|
| 3298 |
+
"step": 535000
|
| 3299 |
+
},
|
| 3300 |
+
{
|
| 3301 |
+
"epoch": 1.0,
|
| 3302 |
+
"learning_rate": 4.818158429521129e-05,
|
| 3303 |
+
"loss": 0.8431,
|
| 3304 |
+
"step": 536000
|
| 3305 |
+
},
|
| 3306 |
+
{
|
| 3307 |
+
"epoch": 1.0,
|
| 3308 |
+
"learning_rate": 4.801635694219079e-05,
|
| 3309 |
+
"loss": 0.8605,
|
| 3310 |
+
"step": 537000
|
| 3311 |
+
},
|
| 3312 |
+
{
|
| 3313 |
+
"epoch": 1.0,
|
| 3314 |
+
"learning_rate": 4.785115128197298e-05,
|
| 3315 |
+
"loss": 0.8734,
|
| 3316 |
+
"step": 538000
|
| 3317 |
+
},
|
| 3318 |
+
{
|
| 3319 |
+
"epoch": 1.0,
|
| 3320 |
+
"learning_rate": 4.7685969121220456e-05,
|
| 3321 |
+
"loss": 0.8778,
|
| 3322 |
+
"step": 539000
|
| 3323 |
+
},
|
| 3324 |
+
{
|
| 3325 |
+
"epoch": 1.01,
|
| 3326 |
+
"learning_rate": 4.7520812266338885e-05,
|
| 3327 |
+
"loss": 0.8858,
|
| 3328 |
+
"step": 540000
|
| 3329 |
+
},
|
| 3330 |
+
{
|
| 3331 |
+
"epoch": 1.01,
|
| 3332 |
+
"learning_rate": 4.735568252345718e-05,
|
| 3333 |
+
"loss": 0.8837,
|
| 3334 |
+
"step": 541000
|
| 3335 |
+
},
|
| 3336 |
+
{
|
| 3337 |
+
"epoch": 1.01,
|
| 3338 |
+
"learning_rate": 4.7190581698407725e-05,
|
| 3339 |
+
"loss": 0.8863,
|
| 3340 |
+
"step": 542000
|
| 3341 |
+
},
|
| 3342 |
+
{
|
| 3343 |
+
"epoch": 1.01,
|
| 3344 |
+
"learning_rate": 4.702551159670672e-05,
|
| 3345 |
+
"loss": 0.89,
|
| 3346 |
+
"step": 543000
|
| 3347 |
+
},
|
| 3348 |
+
{
|
| 3349 |
+
"epoch": 1.01,
|
| 3350 |
+
"learning_rate": 4.6860474023534335e-05,
|
| 3351 |
+
"loss": 0.8921,
|
| 3352 |
+
"step": 544000
|
| 3353 |
+
},
|
| 3354 |
+
{
|
| 3355 |
+
"epoch": 1.01,
|
| 3356 |
+
"learning_rate": 4.669547078371504e-05,
|
| 3357 |
+
"loss": 0.8971,
|
| 3358 |
+
"step": 545000
|
| 3359 |
+
},
|
| 3360 |
+
{
|
| 3361 |
+
"epoch": 1.01,
|
| 3362 |
+
"learning_rate": 4.65305036816978e-05,
|
| 3363 |
+
"loss": 0.8975,
|
| 3364 |
+
"step": 546000
|
| 3365 |
+
},
|
| 3366 |
+
{
|
| 3367 |
+
"epoch": 1.01,
|
| 3368 |
+
"learning_rate": 4.6365574521536445e-05,
|
| 3369 |
+
"loss": 0.8909,
|
| 3370 |
+
"step": 547000
|
| 3371 |
+
},
|
| 3372 |
+
{
|
| 3373 |
+
"epoch": 1.01,
|
| 3374 |
+
"learning_rate": 4.620068510686985e-05,
|
| 3375 |
+
"loss": 0.9122,
|
| 3376 |
+
"step": 548000
|
| 3377 |
+
},
|
| 3378 |
+
{
|
| 3379 |
+
"epoch": 1.01,
|
| 3380 |
+
"learning_rate": 4.60358372409022e-05,
|
| 3381 |
+
"loss": 0.9077,
|
| 3382 |
+
"step": 549000
|
| 3383 |
+
},
|
| 3384 |
+
{
|
| 3385 |
+
"epoch": 1.02,
|
| 3386 |
+
"learning_rate": 4.5871032726383386e-05,
|
| 3387 |
+
"loss": 0.8997,
|
| 3388 |
+
"step": 550000
|
| 3389 |
+
},
|
| 3390 |
+
{
|
| 3391 |
+
"epoch": 1.02,
|
| 3392 |
+
"eval_loss": 0.6843340396881104,
|
| 3393 |
+
"eval_runtime": 38.2086,
|
| 3394 |
+
"eval_samples_per_second": 261.721,
|
| 3395 |
+
"eval_steps_per_second": 4.109,
|
| 3396 |
+
"step": 550000
|
| 3397 |
}
|
| 3398 |
],
|
| 3399 |
"max_steps": 1000000,
|
| 3400 |
"num_train_epochs": 9223372036854775807,
|
| 3401 |
+
"total_flos": 1.6404717616477372e+19,
|
| 3402 |
"trial_name": null,
|
| 3403 |
"trial_params": null
|
| 3404 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1420697771
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73d32e670c1a25deab1c7c4e62d918d91ced49b69c4bf7f2580c58cd7bb52ed3
|
| 3 |
size 1420697771
|
runs/Nov16_15-57-21_t1v-n-7cb529b4-w-0/events.out.tfevents.1668614313.t1v-n-7cb529b4-w-0.98881.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ecd2224cb16bcd63e7e63290c4482ec06797281c14cfc3a7f61f066187470ca
|
| 3 |
+
size 78218
|