Instructions to use lexlms/legal-roberta-large with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use lexlms/legal-roberta-large with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="lexlms/legal-roberta-large")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("lexlms/legal-roberta-large") model = AutoModelForMaskedLM.from_pretrained("lexlms/legal-roberta-large") - Notebooks
- Google Colab
- Kaggle
Commit ·
d6ad042
1
Parent(s): e31078a
Training in progress, step 1000000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +311 -3
- pytorch_model.bin +1 -1
- runs/Nov28_12-08-26_t1v-n-7cb529b4-w-0/events.out.tfevents.1669637379.t1v-n-7cb529b4-w-0.2625564.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2841350745
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e9cfc0cf0e4ebfef38df44339a78495c871dc4fdb1b15bf66baf5551245fcab
|
| 3 |
size 2841350745
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1420697771
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0a529311e7b73b1544a6d34e823d88935fc28967cab28dfe3641c41ef9d9245
|
| 3 |
size 1420697771
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8033b5759c03f0fdee51e437a0c23c3a85f9badbcb2e5f68cd55e6d2383e5577
|
| 3 |
size 13611
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ba26c1d82afce467f99f1dd703268570999f7477616467ac56e429a68fb1d41
|
| 3 |
size 13611
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ba26c1d82afce467f99f1dd703268570999f7477616467ac56e429a68fb1d41
|
| 3 |
size 13611
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8033b5759c03f0fdee51e437a0c23c3a85f9badbcb2e5f68cd55e6d2383e5577
|
| 3 |
size 13611
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:831d0b332f86a08639c5540d922d6f7f898fe0e329f753536e820fa510124211
|
| 3 |
size 13611
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ba26c1d82afce467f99f1dd703268570999f7477616467ac56e429a68fb1d41
|
| 3 |
size 13611
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8033b5759c03f0fdee51e437a0c23c3a85f9badbcb2e5f68cd55e6d2383e5577
|
| 3 |
size 13611
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:831d0b332f86a08639c5540d922d6f7f898fe0e329f753536e820fa510124211
|
| 3 |
size 13611
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:906bc3ed48818cc1785b6a98c1e064532a322520b99cdf458cfd827674d9b7ec
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5858,11 +5858,319 @@
|
|
| 5858 |
"eval_samples_per_second": 254.526,
|
| 5859 |
"eval_steps_per_second": 3.996,
|
| 5860 |
"step": 950000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5861 |
}
|
| 5862 |
],
|
| 5863 |
"max_steps": 1000000,
|
| 5864 |
"num_train_epochs": 9223372036854775807,
|
| 5865 |
-
"total_flos": 2.
|
| 5866 |
"trial_name": null,
|
| 5867 |
"trial_params": null
|
| 5868 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.4659550000000001,
|
| 5 |
+
"global_step": 1000000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5858 |
"eval_samples_per_second": 254.526,
|
| 5859 |
"eval_steps_per_second": 3.996,
|
| 5860 |
"step": 950000
|
| 5861 |
+
},
|
| 5862 |
+
{
|
| 5863 |
+
"epoch": 1.42,
|
| 5864 |
+
"learning_rate": 6.549893279788277e-07,
|
| 5865 |
+
"loss": 0.7946,
|
| 5866 |
+
"step": 951000
|
| 5867 |
+
},
|
| 5868 |
+
{
|
| 5869 |
+
"epoch": 1.42,
|
| 5870 |
+
"learning_rate": 6.285834552247128e-07,
|
| 5871 |
+
"loss": 0.8071,
|
| 5872 |
+
"step": 952000
|
| 5873 |
+
},
|
| 5874 |
+
{
|
| 5875 |
+
"epoch": 1.42,
|
| 5876 |
+
"learning_rate": 6.027175003719354e-07,
|
| 5877 |
+
"loss": 0.8073,
|
| 5878 |
+
"step": 953000
|
| 5879 |
+
},
|
| 5880 |
+
{
|
| 5881 |
+
"epoch": 1.42,
|
| 5882 |
+
"learning_rate": 5.773917462864264e-07,
|
| 5883 |
+
"loss": 0.8005,
|
| 5884 |
+
"step": 954000
|
| 5885 |
+
},
|
| 5886 |
+
{
|
| 5887 |
+
"epoch": 1.42,
|
| 5888 |
+
"learning_rate": 5.526064699265753e-07,
|
| 5889 |
+
"loss": 0.8143,
|
| 5890 |
+
"step": 955000
|
| 5891 |
+
},
|
| 5892 |
+
{
|
| 5893 |
+
"epoch": 1.42,
|
| 5894 |
+
"learning_rate": 5.283619423401998e-07,
|
| 5895 |
+
"loss": 0.8085,
|
| 5896 |
+
"step": 956000
|
| 5897 |
+
},
|
| 5898 |
+
{
|
| 5899 |
+
"epoch": 1.42,
|
| 5900 |
+
"learning_rate": 5.046584286615697e-07,
|
| 5901 |
+
"loss": 0.8065,
|
| 5902 |
+
"step": 957000
|
| 5903 |
+
},
|
| 5904 |
+
{
|
| 5905 |
+
"epoch": 1.42,
|
| 5906 |
+
"learning_rate": 4.814961881085045e-07,
|
| 5907 |
+
"loss": 0.7965,
|
| 5908 |
+
"step": 958000
|
| 5909 |
+
},
|
| 5910 |
+
{
|
| 5911 |
+
"epoch": 1.42,
|
| 5912 |
+
"learning_rate": 4.5887547397955864e-07,
|
| 5913 |
+
"loss": 0.7935,
|
| 5914 |
+
"step": 959000
|
| 5915 |
+
},
|
| 5916 |
+
{
|
| 5917 |
+
"epoch": 1.43,
|
| 5918 |
+
"learning_rate": 4.367965336512403e-07,
|
| 5919 |
+
"loss": 0.8058,
|
| 5920 |
+
"step": 960000
|
| 5921 |
+
},
|
| 5922 |
+
{
|
| 5923 |
+
"epoch": 1.43,
|
| 5924 |
+
"learning_rate": 4.1525960857530243e-07,
|
| 5925 |
+
"loss": 0.8087,
|
| 5926 |
+
"step": 961000
|
| 5927 |
+
},
|
| 5928 |
+
{
|
| 5929 |
+
"epoch": 1.43,
|
| 5930 |
+
"learning_rate": 3.9426493427611177e-07,
|
| 5931 |
+
"loss": 0.8059,
|
| 5932 |
+
"step": 962000
|
| 5933 |
+
},
|
| 5934 |
+
{
|
| 5935 |
+
"epoch": 1.43,
|
| 5936 |
+
"learning_rate": 3.738127403480507e-07,
|
| 5937 |
+
"loss": 0.8112,
|
| 5938 |
+
"step": 963000
|
| 5939 |
+
},
|
| 5940 |
+
{
|
| 5941 |
+
"epoch": 1.43,
|
| 5942 |
+
"learning_rate": 3.5390325045304706e-07,
|
| 5943 |
+
"loss": 0.8136,
|
| 5944 |
+
"step": 964000
|
| 5945 |
+
},
|
| 5946 |
+
{
|
| 5947 |
+
"epoch": 1.43,
|
| 5948 |
+
"learning_rate": 3.3453668231809286e-07,
|
| 5949 |
+
"loss": 0.8115,
|
| 5950 |
+
"step": 965000
|
| 5951 |
+
},
|
| 5952 |
+
{
|
| 5953 |
+
"epoch": 1.43,
|
| 5954 |
+
"learning_rate": 3.157132477328628e-07,
|
| 5955 |
+
"loss": 0.7955,
|
| 5956 |
+
"step": 966000
|
| 5957 |
+
},
|
| 5958 |
+
{
|
| 5959 |
+
"epoch": 1.43,
|
| 5960 |
+
"learning_rate": 2.9743315254743833e-07,
|
| 5961 |
+
"loss": 0.8069,
|
| 5962 |
+
"step": 967000
|
| 5963 |
+
},
|
| 5964 |
+
{
|
| 5965 |
+
"epoch": 1.43,
|
| 5966 |
+
"learning_rate": 2.796965966699927e-07,
|
| 5967 |
+
"loss": 0.8109,
|
| 5968 |
+
"step": 968000
|
| 5969 |
+
},
|
| 5970 |
+
{
|
| 5971 |
+
"epoch": 1.43,
|
| 5972 |
+
"learning_rate": 2.625037740646763e-07,
|
| 5973 |
+
"loss": 0.8081,
|
| 5974 |
+
"step": 969000
|
| 5975 |
+
},
|
| 5976 |
+
{
|
| 5977 |
+
"epoch": 1.44,
|
| 5978 |
+
"learning_rate": 2.458548727494292e-07,
|
| 5979 |
+
"loss": 0.8169,
|
| 5980 |
+
"step": 970000
|
| 5981 |
+
},
|
| 5982 |
+
{
|
| 5983 |
+
"epoch": 1.44,
|
| 5984 |
+
"learning_rate": 2.2975007479397738e-07,
|
| 5985 |
+
"loss": 0.8193,
|
| 5986 |
+
"step": 971000
|
| 5987 |
+
},
|
| 5988 |
+
{
|
| 5989 |
+
"epoch": 1.44,
|
| 5990 |
+
"learning_rate": 2.1418955631781202e-07,
|
| 5991 |
+
"loss": 0.8084,
|
| 5992 |
+
"step": 972000
|
| 5993 |
+
},
|
| 5994 |
+
{
|
| 5995 |
+
"epoch": 1.44,
|
| 5996 |
+
"learning_rate": 1.9917348748826335e-07,
|
| 5997 |
+
"loss": 0.7984,
|
| 5998 |
+
"step": 973000
|
| 5999 |
+
},
|
| 6000 |
+
{
|
| 6001 |
+
"epoch": 1.44,
|
| 6002 |
+
"learning_rate": 1.847020325186577e-07,
|
| 6003 |
+
"loss": 0.7909,
|
| 6004 |
+
"step": 974000
|
| 6005 |
+
},
|
| 6006 |
+
{
|
| 6007 |
+
"epoch": 1.44,
|
| 6008 |
+
"learning_rate": 1.7077534966650766e-07,
|
| 6009 |
+
"loss": 0.7789,
|
| 6010 |
+
"step": 975000
|
| 6011 |
+
},
|
| 6012 |
+
{
|
| 6013 |
+
"epoch": 1.44,
|
| 6014 |
+
"learning_rate": 1.5739359123178587e-07,
|
| 6015 |
+
"loss": 0.8053,
|
| 6016 |
+
"step": 976000
|
| 6017 |
+
},
|
| 6018 |
+
{
|
| 6019 |
+
"epoch": 1.44,
|
| 6020 |
+
"learning_rate": 1.4455690355525964e-07,
|
| 6021 |
+
"loss": 0.8,
|
| 6022 |
+
"step": 977000
|
| 6023 |
+
},
|
| 6024 |
+
{
|
| 6025 |
+
"epoch": 1.44,
|
| 6026 |
+
"learning_rate": 1.3226542701689215e-07,
|
| 6027 |
+
"loss": 0.7943,
|
| 6028 |
+
"step": 978000
|
| 6029 |
+
},
|
| 6030 |
+
{
|
| 6031 |
+
"epoch": 1.44,
|
| 6032 |
+
"learning_rate": 1.2051929603428825e-07,
|
| 6033 |
+
"loss": 0.7907,
|
| 6034 |
+
"step": 979000
|
| 6035 |
+
},
|
| 6036 |
+
{
|
| 6037 |
+
"epoch": 1.45,
|
| 6038 |
+
"learning_rate": 1.0931863906127327e-07,
|
| 6039 |
+
"loss": 0.7951,
|
| 6040 |
+
"step": 980000
|
| 6041 |
+
},
|
| 6042 |
+
{
|
| 6043 |
+
"epoch": 1.45,
|
| 6044 |
+
"learning_rate": 9.866357858642205e-08,
|
| 6045 |
+
"loss": 0.798,
|
| 6046 |
+
"step": 981000
|
| 6047 |
+
},
|
| 6048 |
+
{
|
| 6049 |
+
"epoch": 1.45,
|
| 6050 |
+
"learning_rate": 8.855423113177664e-08,
|
| 6051 |
+
"loss": 0.8057,
|
| 6052 |
+
"step": 982000
|
| 6053 |
+
},
|
| 6054 |
+
{
|
| 6055 |
+
"epoch": 1.45,
|
| 6056 |
+
"learning_rate": 7.899070725153613e-08,
|
| 6057 |
+
"loss": 0.7998,
|
| 6058 |
+
"step": 983000
|
| 6059 |
+
},
|
| 6060 |
+
{
|
| 6061 |
+
"epoch": 1.45,
|
| 6062 |
+
"learning_rate": 6.997311153086883e-08,
|
| 6063 |
+
"loss": 0.7946,
|
| 6064 |
+
"step": 984000
|
| 6065 |
+
},
|
| 6066 |
+
{
|
| 6067 |
+
"epoch": 1.45,
|
| 6068 |
+
"learning_rate": 6.150154258476315e-08,
|
| 6069 |
+
"loss": 0.8003,
|
| 6070 |
+
"step": 985000
|
| 6071 |
+
},
|
| 6072 |
+
{
|
| 6073 |
+
"epoch": 1.45,
|
| 6074 |
+
"learning_rate": 5.3576093056922906e-08,
|
| 6075 |
+
"loss": 0.8032,
|
| 6076 |
+
"step": 986000
|
| 6077 |
+
},
|
| 6078 |
+
{
|
| 6079 |
+
"epoch": 1.45,
|
| 6080 |
+
"learning_rate": 4.619684961881254e-08,
|
| 6081 |
+
"loss": 0.7832,
|
| 6082 |
+
"step": 987000
|
| 6083 |
+
},
|
| 6084 |
+
{
|
| 6085 |
+
"epoch": 1.45,
|
| 6086 |
+
"learning_rate": 3.936389296864129e-08,
|
| 6087 |
+
"loss": 0.7957,
|
| 6088 |
+
"step": 988000
|
| 6089 |
+
},
|
| 6090 |
+
{
|
| 6091 |
+
"epoch": 1.45,
|
| 6092 |
+
"learning_rate": 3.3077297830541584e-08,
|
| 6093 |
+
"loss": 0.8173,
|
| 6094 |
+
"step": 989000
|
| 6095 |
+
},
|
| 6096 |
+
{
|
| 6097 |
+
"epoch": 1.46,
|
| 6098 |
+
"learning_rate": 2.7337132953697554e-08,
|
| 6099 |
+
"loss": 0.804,
|
| 6100 |
+
"step": 990000
|
| 6101 |
+
},
|
| 6102 |
+
{
|
| 6103 |
+
"epoch": 1.46,
|
| 6104 |
+
"learning_rate": 2.214346111164556e-08,
|
| 6105 |
+
"loss": 0.7921,
|
| 6106 |
+
"step": 991000
|
| 6107 |
+
},
|
| 6108 |
+
{
|
| 6109 |
+
"epoch": 1.46,
|
| 6110 |
+
"learning_rate": 1.749633910153592e-08,
|
| 6111 |
+
"loss": 0.8102,
|
| 6112 |
+
"step": 992000
|
| 6113 |
+
},
|
| 6114 |
+
{
|
| 6115 |
+
"epoch": 1.46,
|
| 6116 |
+
"learning_rate": 1.3395817743561134e-08,
|
| 6117 |
+
"loss": 0.8074,
|
| 6118 |
+
"step": 993000
|
| 6119 |
+
},
|
| 6120 |
+
{
|
| 6121 |
+
"epoch": 1.46,
|
| 6122 |
+
"learning_rate": 9.841941880361916e-09,
|
| 6123 |
+
"loss": 0.7963,
|
| 6124 |
+
"step": 994000
|
| 6125 |
+
},
|
| 6126 |
+
{
|
| 6127 |
+
"epoch": 1.46,
|
| 6128 |
+
"learning_rate": 6.834750376549792e-09,
|
| 6129 |
+
"loss": 0.8013,
|
| 6130 |
+
"step": 995000
|
| 6131 |
+
},
|
| 6132 |
+
{
|
| 6133 |
+
"epoch": 1.46,
|
| 6134 |
+
"learning_rate": 4.3742761183018784e-09,
|
| 6135 |
+
"loss": 0.8027,
|
| 6136 |
+
"step": 996000
|
| 6137 |
+
},
|
| 6138 |
+
{
|
| 6139 |
+
"epoch": 1.46,
|
| 6140 |
+
"learning_rate": 2.4605460129556445e-09,
|
| 6141 |
+
"loss": 0.8073,
|
| 6142 |
+
"step": 997000
|
| 6143 |
+
},
|
| 6144 |
+
{
|
| 6145 |
+
"epoch": 1.46,
|
| 6146 |
+
"learning_rate": 1.0935809887702154e-09,
|
| 6147 |
+
"loss": 0.8076,
|
| 6148 |
+
"step": 998000
|
| 6149 |
+
},
|
| 6150 |
+
{
|
| 6151 |
+
"epoch": 1.46,
|
| 6152 |
+
"learning_rate": 2.7339599464326627e-10,
|
| 6153 |
+
"loss": 0.8206,
|
| 6154 |
+
"step": 999000
|
| 6155 |
+
},
|
| 6156 |
+
{
|
| 6157 |
+
"epoch": 1.47,
|
| 6158 |
+
"learning_rate": 0.0,
|
| 6159 |
+
"loss": 0.8031,
|
| 6160 |
+
"step": 1000000
|
| 6161 |
+
},
|
| 6162 |
+
{
|
| 6163 |
+
"epoch": 1.47,
|
| 6164 |
+
"eval_loss": 0.6313675045967102,
|
| 6165 |
+
"eval_runtime": 39.6188,
|
| 6166 |
+
"eval_samples_per_second": 252.405,
|
| 6167 |
+
"eval_steps_per_second": 3.963,
|
| 6168 |
+
"step": 1000000
|
| 6169 |
}
|
| 6170 |
],
|
| 6171 |
"max_steps": 1000000,
|
| 6172 |
"num_train_epochs": 9223372036854775807,
|
| 6173 |
+
"total_flos": 2.9826726086567657e+19,
|
| 6174 |
"trial_name": null,
|
| 6175 |
"trial_params": null
|
| 6176 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1420697771
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0a529311e7b73b1544a6d34e823d88935fc28967cab28dfe3641c41ef9d9245
|
| 3 |
size 1420697771
|
runs/Nov28_12-08-26_t1v-n-7cb529b4-w-0/events.out.tfevents.1669637379.t1v-n-7cb529b4-w-0.2625564.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5f35d161162d9608ce252e1359e6edf6ab56d695e1627810f9dd07c69d886f5
|
| 3 |
+
size 78218
|