Instructions to use lexlms/legal-roberta-base with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use lexlms/legal-roberta-base with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="lexlms/legal-roberta-base")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("lexlms/legal-roberta-base") model = AutoModelForMaskedLM.from_pretrained("lexlms/legal-roberta-base") - Notebooks
- Google Colab
- Kaggle
Commit ·
f78ff50
1
Parent(s): 6b769b2
Training in progress, step 300000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +311 -3
- pytorch_model.bin +1 -1
- runs/Nov11_11-14-20_t1v-n-088af867-w-0/events.out.tfevents.1668165319.t1v-n-088af867-w-0.346597.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 996067161
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07a6b46aa8152383b0a50bd07b07a176c831d5337ddec91d8b5b5aba89b5b543
|
| 3 |
size 996067161
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498046827
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2517996d5c4c4163884506060e457650dff4618ff3814b55dc92b5b3c209528
|
| 3 |
size 498046827
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b06f062cec2bd581a5223b4bd92c5f2dcf09d9cd3cfe1d2d70466080b2e9546a
|
| 3 |
size 13611
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d998162d48f4ee450ef4bf13d9e047fd62295bbe3f16a4efd05adea28896f336
|
| 3 |
size 13611
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b06f062cec2bd581a5223b4bd92c5f2dcf09d9cd3cfe1d2d70466080b2e9546a
|
| 3 |
size 13611
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:372875e5805e9be2c35e094a3a6b2332849035b4c4b7dbcec07d8d9b728521d7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b06f062cec2bd581a5223b4bd92c5f2dcf09d9cd3cfe1d2d70466080b2e9546a
|
| 3 |
size 13611
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:372875e5805e9be2c35e094a3a6b2332849035b4c4b7dbcec07d8d9b728521d7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b06f062cec2bd581a5223b4bd92c5f2dcf09d9cd3cfe1d2d70466080b2e9546a
|
| 3 |
size 13611
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:305632a8833b49b3167aa1319ddc125408cf66a7d459afebacacaa7cdcedb877
|
| 3 |
size 13611
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d41c6734c2aef1f60ed0fbc886cbc351448520889799ebfa66c14f8f9e99059
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1546,11 +1546,319 @@
|
|
| 1546 |
"eval_samples_per_second": 487.732,
|
| 1547 |
"eval_steps_per_second": 1.951,
|
| 1548 |
"step": 250000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1549 |
}
|
| 1550 |
],
|
| 1551 |
"max_steps": 1000000,
|
| 1552 |
"num_train_epochs": 9223372036854775807,
|
| 1553 |
-
"total_flos":
|
| 1554 |
"trial_name": null,
|
| 1555 |
"trial_params": null
|
| 1556 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.032937,
|
| 5 |
+
"global_step": 300000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1546 |
"eval_samples_per_second": 487.732,
|
| 1547 |
"eval_steps_per_second": 1.951,
|
| 1548 |
"step": 250000
|
| 1549 |
+
},
|
| 1550 |
+
{
|
| 1551 |
+
"epoch": 0.25,
|
| 1552 |
+
"learning_rate": 8.935525168886262e-05,
|
| 1553 |
+
"loss": 0.907,
|
| 1554 |
+
"step": 251000
|
| 1555 |
+
},
|
| 1556 |
+
{
|
| 1557 |
+
"epoch": 0.25,
|
| 1558 |
+
"learning_rate": 8.92530475251784e-05,
|
| 1559 |
+
"loss": 0.8985,
|
| 1560 |
+
"step": 252000
|
| 1561 |
+
},
|
| 1562 |
+
{
|
| 1563 |
+
"epoch": 0.25,
|
| 1564 |
+
"learning_rate": 8.91504140964553e-05,
|
| 1565 |
+
"loss": 0.8905,
|
| 1566 |
+
"step": 253000
|
| 1567 |
+
},
|
| 1568 |
+
{
|
| 1569 |
+
"epoch": 0.25,
|
| 1570 |
+
"learning_rate": 8.90473525250761e-05,
|
| 1571 |
+
"loss": 0.9049,
|
| 1572 |
+
"step": 254000
|
| 1573 |
+
},
|
| 1574 |
+
{
|
| 1575 |
+
"epoch": 0.26,
|
| 1576 |
+
"learning_rate": 8.894386393810563e-05,
|
| 1577 |
+
"loss": 0.8869,
|
| 1578 |
+
"step": 255000
|
| 1579 |
+
},
|
| 1580 |
+
{
|
| 1581 |
+
"epoch": 0.26,
|
| 1582 |
+
"learning_rate": 8.883994946727849e-05,
|
| 1583 |
+
"loss": 0.9039,
|
| 1584 |
+
"step": 256000
|
| 1585 |
+
},
|
| 1586 |
+
{
|
| 1587 |
+
"epoch": 0.26,
|
| 1588 |
+
"learning_rate": 8.873561024898668e-05,
|
| 1589 |
+
"loss": 0.9061,
|
| 1590 |
+
"step": 257000
|
| 1591 |
+
},
|
| 1592 |
+
{
|
| 1593 |
+
"epoch": 0.26,
|
| 1594 |
+
"learning_rate": 8.863084742426719e-05,
|
| 1595 |
+
"loss": 0.906,
|
| 1596 |
+
"step": 258000
|
| 1597 |
+
},
|
| 1598 |
+
{
|
| 1599 |
+
"epoch": 0.26,
|
| 1600 |
+
"learning_rate": 8.852566213878947e-05,
|
| 1601 |
+
"loss": 0.9074,
|
| 1602 |
+
"step": 259000
|
| 1603 |
+
},
|
| 1604 |
+
{
|
| 1605 |
+
"epoch": 0.26,
|
| 1606 |
+
"learning_rate": 8.842005554284296e-05,
|
| 1607 |
+
"loss": 0.8902,
|
| 1608 |
+
"step": 260000
|
| 1609 |
+
},
|
| 1610 |
+
{
|
| 1611 |
+
"epoch": 0.26,
|
| 1612 |
+
"learning_rate": 8.831402879132446e-05,
|
| 1613 |
+
"loss": 0.8941,
|
| 1614 |
+
"step": 261000
|
| 1615 |
+
},
|
| 1616 |
+
{
|
| 1617 |
+
"epoch": 0.26,
|
| 1618 |
+
"learning_rate": 8.820758304372557e-05,
|
| 1619 |
+
"loss": 0.8893,
|
| 1620 |
+
"step": 262000
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"epoch": 0.26,
|
| 1624 |
+
"learning_rate": 8.810071946411989e-05,
|
| 1625 |
+
"loss": 0.8922,
|
| 1626 |
+
"step": 263000
|
| 1627 |
+
},
|
| 1628 |
+
{
|
| 1629 |
+
"epoch": 0.26,
|
| 1630 |
+
"learning_rate": 8.799343922115044e-05,
|
| 1631 |
+
"loss": 0.8923,
|
| 1632 |
+
"step": 264000
|
| 1633 |
+
},
|
| 1634 |
+
{
|
| 1635 |
+
"epoch": 0.27,
|
| 1636 |
+
"learning_rate": 8.788574348801675e-05,
|
| 1637 |
+
"loss": 0.8796,
|
| 1638 |
+
"step": 265000
|
| 1639 |
+
},
|
| 1640 |
+
{
|
| 1641 |
+
"epoch": 0.27,
|
| 1642 |
+
"learning_rate": 8.77776334424621e-05,
|
| 1643 |
+
"loss": 0.8891,
|
| 1644 |
+
"step": 266000
|
| 1645 |
+
},
|
| 1646 |
+
{
|
| 1647 |
+
"epoch": 0.27,
|
| 1648 |
+
"learning_rate": 8.766911026676064e-05,
|
| 1649 |
+
"loss": 0.8904,
|
| 1650 |
+
"step": 267000
|
| 1651 |
+
},
|
| 1652 |
+
{
|
| 1653 |
+
"epoch": 1.0,
|
| 1654 |
+
"learning_rate": 8.756017514770443e-05,
|
| 1655 |
+
"loss": 0.8777,
|
| 1656 |
+
"step": 268000
|
| 1657 |
+
},
|
| 1658 |
+
{
|
| 1659 |
+
"epoch": 1.0,
|
| 1660 |
+
"learning_rate": 8.745082927659047e-05,
|
| 1661 |
+
"loss": 0.8829,
|
| 1662 |
+
"step": 269000
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 1.0,
|
| 1666 |
+
"learning_rate": 8.73410738492077e-05,
|
| 1667 |
+
"loss": 0.8968,
|
| 1668 |
+
"step": 270000
|
| 1669 |
+
},
|
| 1670 |
+
{
|
| 1671 |
+
"epoch": 1.0,
|
| 1672 |
+
"learning_rate": 8.723091006582389e-05,
|
| 1673 |
+
"loss": 0.9007,
|
| 1674 |
+
"step": 271000
|
| 1675 |
+
},
|
| 1676 |
+
{
|
| 1677 |
+
"epoch": 1.0,
|
| 1678 |
+
"learning_rate": 8.71203391311725e-05,
|
| 1679 |
+
"loss": 0.9072,
|
| 1680 |
+
"step": 272000
|
| 1681 |
+
},
|
| 1682 |
+
{
|
| 1683 |
+
"epoch": 1.01,
|
| 1684 |
+
"learning_rate": 8.700936225443959e-05,
|
| 1685 |
+
"loss": 0.9152,
|
| 1686 |
+
"step": 273000
|
| 1687 |
+
},
|
| 1688 |
+
{
|
| 1689 |
+
"epoch": 1.01,
|
| 1690 |
+
"learning_rate": 8.689798064925049e-05,
|
| 1691 |
+
"loss": 0.919,
|
| 1692 |
+
"step": 274000
|
| 1693 |
+
},
|
| 1694 |
+
{
|
| 1695 |
+
"epoch": 1.01,
|
| 1696 |
+
"learning_rate": 8.678619553365659e-05,
|
| 1697 |
+
"loss": 0.9189,
|
| 1698 |
+
"step": 275000
|
| 1699 |
+
},
|
| 1700 |
+
{
|
| 1701 |
+
"epoch": 1.01,
|
| 1702 |
+
"learning_rate": 8.6674008130122e-05,
|
| 1703 |
+
"loss": 0.9009,
|
| 1704 |
+
"step": 276000
|
| 1705 |
+
},
|
| 1706 |
+
{
|
| 1707 |
+
"epoch": 1.01,
|
| 1708 |
+
"learning_rate": 8.656141966551019e-05,
|
| 1709 |
+
"loss": 0.9005,
|
| 1710 |
+
"step": 277000
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"epoch": 1.01,
|
| 1714 |
+
"learning_rate": 8.644843137107059e-05,
|
| 1715 |
+
"loss": 0.9028,
|
| 1716 |
+
"step": 278000
|
| 1717 |
+
},
|
| 1718 |
+
{
|
| 1719 |
+
"epoch": 1.01,
|
| 1720 |
+
"learning_rate": 8.633504448242505e-05,
|
| 1721 |
+
"loss": 0.8946,
|
| 1722 |
+
"step": 279000
|
| 1723 |
+
},
|
| 1724 |
+
{
|
| 1725 |
+
"epoch": 1.01,
|
| 1726 |
+
"learning_rate": 8.622126023955446e-05,
|
| 1727 |
+
"loss": 0.9021,
|
| 1728 |
+
"step": 280000
|
| 1729 |
+
},
|
| 1730 |
+
{
|
| 1731 |
+
"epoch": 1.01,
|
| 1732 |
+
"learning_rate": 8.610707988678503e-05,
|
| 1733 |
+
"loss": 0.8995,
|
| 1734 |
+
"step": 281000
|
| 1735 |
+
},
|
| 1736 |
+
{
|
| 1737 |
+
"epoch": 1.01,
|
| 1738 |
+
"learning_rate": 8.599250467277483e-05,
|
| 1739 |
+
"loss": 1.1865,
|
| 1740 |
+
"step": 282000
|
| 1741 |
+
},
|
| 1742 |
+
{
|
| 1743 |
+
"epoch": 1.02,
|
| 1744 |
+
"learning_rate": 8.587753585050004e-05,
|
| 1745 |
+
"loss": 0.9683,
|
| 1746 |
+
"step": 283000
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"epoch": 1.02,
|
| 1750 |
+
"learning_rate": 8.576217467724128e-05,
|
| 1751 |
+
"loss": 0.9102,
|
| 1752 |
+
"step": 284000
|
| 1753 |
+
},
|
| 1754 |
+
{
|
| 1755 |
+
"epoch": 1.02,
|
| 1756 |
+
"learning_rate": 8.564642241456986e-05,
|
| 1757 |
+
"loss": 0.9166,
|
| 1758 |
+
"step": 285000
|
| 1759 |
+
},
|
| 1760 |
+
{
|
| 1761 |
+
"epoch": 1.02,
|
| 1762 |
+
"learning_rate": 8.553028032833397e-05,
|
| 1763 |
+
"loss": 0.9031,
|
| 1764 |
+
"step": 286000
|
| 1765 |
+
},
|
| 1766 |
+
{
|
| 1767 |
+
"epoch": 1.02,
|
| 1768 |
+
"learning_rate": 8.541374968864487e-05,
|
| 1769 |
+
"loss": 0.9031,
|
| 1770 |
+
"step": 287000
|
| 1771 |
+
},
|
| 1772 |
+
{
|
| 1773 |
+
"epoch": 1.02,
|
| 1774 |
+
"learning_rate": 8.529683176986295e-05,
|
| 1775 |
+
"loss": 0.9015,
|
| 1776 |
+
"step": 288000
|
| 1777 |
+
},
|
| 1778 |
+
{
|
| 1779 |
+
"epoch": 1.02,
|
| 1780 |
+
"learning_rate": 8.517952785058385e-05,
|
| 1781 |
+
"loss": 0.9023,
|
| 1782 |
+
"step": 289000
|
| 1783 |
+
},
|
| 1784 |
+
{
|
| 1785 |
+
"epoch": 1.02,
|
| 1786 |
+
"learning_rate": 8.506183921362443e-05,
|
| 1787 |
+
"loss": 0.8973,
|
| 1788 |
+
"step": 290000
|
| 1789 |
+
},
|
| 1790 |
+
{
|
| 1791 |
+
"epoch": 1.02,
|
| 1792 |
+
"learning_rate": 8.494376714600878e-05,
|
| 1793 |
+
"loss": 0.8804,
|
| 1794 |
+
"step": 291000
|
| 1795 |
+
},
|
| 1796 |
+
{
|
| 1797 |
+
"epoch": 1.02,
|
| 1798 |
+
"learning_rate": 8.482531293895412e-05,
|
| 1799 |
+
"loss": 0.8831,
|
| 1800 |
+
"step": 292000
|
| 1801 |
+
},
|
| 1802 |
+
{
|
| 1803 |
+
"epoch": 1.03,
|
| 1804 |
+
"learning_rate": 8.470647788785665e-05,
|
| 1805 |
+
"loss": 0.8951,
|
| 1806 |
+
"step": 293000
|
| 1807 |
+
},
|
| 1808 |
+
{
|
| 1809 |
+
"epoch": 1.03,
|
| 1810 |
+
"learning_rate": 8.458726329227747e-05,
|
| 1811 |
+
"loss": 0.8957,
|
| 1812 |
+
"step": 294000
|
| 1813 |
+
},
|
| 1814 |
+
{
|
| 1815 |
+
"epoch": 1.03,
|
| 1816 |
+
"learning_rate": 8.44676704559283e-05,
|
| 1817 |
+
"loss": 0.8782,
|
| 1818 |
+
"step": 295000
|
| 1819 |
+
},
|
| 1820 |
+
{
|
| 1821 |
+
"epoch": 1.03,
|
| 1822 |
+
"learning_rate": 8.434770068665723e-05,
|
| 1823 |
+
"loss": 0.8838,
|
| 1824 |
+
"step": 296000
|
| 1825 |
+
},
|
| 1826 |
+
{
|
| 1827 |
+
"epoch": 1.03,
|
| 1828 |
+
"learning_rate": 8.422735529643444e-05,
|
| 1829 |
+
"loss": 0.8902,
|
| 1830 |
+
"step": 297000
|
| 1831 |
+
},
|
| 1832 |
+
{
|
| 1833 |
+
"epoch": 1.03,
|
| 1834 |
+
"learning_rate": 8.410663560133784e-05,
|
| 1835 |
+
"loss": 0.8823,
|
| 1836 |
+
"step": 298000
|
| 1837 |
+
},
|
| 1838 |
+
{
|
| 1839 |
+
"epoch": 1.03,
|
| 1840 |
+
"learning_rate": 8.398554292153866e-05,
|
| 1841 |
+
"loss": 0.8785,
|
| 1842 |
+
"step": 299000
|
| 1843 |
+
},
|
| 1844 |
+
{
|
| 1845 |
+
"epoch": 1.03,
|
| 1846 |
+
"learning_rate": 8.386407858128706e-05,
|
| 1847 |
+
"loss": 0.8813,
|
| 1848 |
+
"step": 300000
|
| 1849 |
+
},
|
| 1850 |
+
{
|
| 1851 |
+
"epoch": 1.03,
|
| 1852 |
+
"eval_loss": 0.8203372955322266,
|
| 1853 |
+
"eval_runtime": 20.3044,
|
| 1854 |
+
"eval_samples_per_second": 492.503,
|
| 1855 |
+
"eval_steps_per_second": 1.97,
|
| 1856 |
+
"step": 300000
|
| 1857 |
}
|
| 1858 |
],
|
| 1859 |
"max_steps": 1000000,
|
| 1860 |
"num_train_epochs": 9223372036854775807,
|
| 1861 |
+
"total_flos": 5.0546812649472e+18,
|
| 1862 |
"trial_name": null,
|
| 1863 |
"trial_params": null
|
| 1864 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498046827
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2517996d5c4c4163884506060e457650dff4618ff3814b55dc92b5b3c209528
|
| 3 |
size 498046827
|
runs/Nov11_11-14-20_t1v-n-088af867-w-0/events.out.tfevents.1668165319.t1v-n-088af867-w-0.346597.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4c444c4e8e903f12d92ca730d51afacf1008416ca703be4c9290ba2b7d54fe2
|
| 3 |
+
size 53355
|