somaia02 commited on
Commit
936b4f2
·
1 Parent(s): 4b89801

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:264ecec36fa0a7877a74e5bc90de5cfe426844837e851cd04e19bd3e592e07d9
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa4cf5ab1fb087d029d4381bb5e28a055d9c817a87979d4b736175862d080f9b
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9825a8978c13d22cdcc9972f981eca76a202db226c336284f26d0d6ae781e227
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6572beb7bcdb366917f4d9b45d34f2485f10958107a6631df832abba4e4d9c9c
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d122c28c5fc3f53d9d112345e4abc97c9cbf42b6d6c3da282213a9e89d67c386
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ff46ad6a9496f04d82a8799741e7a0a0edd57f04423aee3b129229a16b468b4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c35dfe7906d79114431a0065ffa36a90ef274205cc0fefa7802dd197d609956
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a929be8c5a8f87235cd3c7ec1fa0db3faf4d300da6627139836df2b8a53bd9f3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.45563551783561707,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-1500",
4
- "epoch": 3.262642740619902,
5
  "eval_steps": 100,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1367,13 +1367,353 @@
1367
  "eval_samples_per_second": 90.523,
1368
  "eval_steps_per_second": 11.364,
1369
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370
  }
1371
  ],
1372
  "logging_steps": 10,
1373
  "max_steps": 6130,
1374
  "num_train_epochs": 10,
1375
  "save_steps": 500,
1376
- "total_flos": 3778253218971648.0,
1377
  "trial_name": null,
1378
  "trial_params": null
1379
  }
 
1
  {
2
+ "best_metric": 0.4399421811103821,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-2500",
4
+ "epoch": 4.078303425774878,
5
  "eval_steps": 100,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1367
  "eval_samples_per_second": 90.523,
1368
  "eval_steps_per_second": 11.364,
1369
  "step": 2000
1370
+ },
1371
+ {
1372
+ "epoch": 3.28,
1373
+ "learning_rate": 0.0007317939609236234,
1374
+ "loss": 0.4824,
1375
+ "step": 2010
1376
+ },
1377
+ {
1378
+ "epoch": 3.3,
1379
+ "learning_rate": 0.0007300177619893427,
1380
+ "loss": 0.4741,
1381
+ "step": 2020
1382
+ },
1383
+ {
1384
+ "epoch": 3.31,
1385
+ "learning_rate": 0.0007282415630550621,
1386
+ "loss": 0.4649,
1387
+ "step": 2030
1388
+ },
1389
+ {
1390
+ "epoch": 3.33,
1391
+ "learning_rate": 0.0007264653641207815,
1392
+ "loss": 0.471,
1393
+ "step": 2040
1394
+ },
1395
+ {
1396
+ "epoch": 3.34,
1397
+ "learning_rate": 0.0007246891651865008,
1398
+ "loss": 0.4411,
1399
+ "step": 2050
1400
+ },
1401
+ {
1402
+ "epoch": 3.36,
1403
+ "learning_rate": 0.0007229129662522202,
1404
+ "loss": 0.4435,
1405
+ "step": 2060
1406
+ },
1407
+ {
1408
+ "epoch": 3.38,
1409
+ "learning_rate": 0.0007211367673179397,
1410
+ "loss": 0.4464,
1411
+ "step": 2070
1412
+ },
1413
+ {
1414
+ "epoch": 3.39,
1415
+ "learning_rate": 0.000719360568383659,
1416
+ "loss": 0.4268,
1417
+ "step": 2080
1418
+ },
1419
+ {
1420
+ "epoch": 3.41,
1421
+ "learning_rate": 0.0007175843694493784,
1422
+ "loss": 0.4648,
1423
+ "step": 2090
1424
+ },
1425
+ {
1426
+ "epoch": 3.43,
1427
+ "learning_rate": 0.0007158081705150978,
1428
+ "loss": 0.4746,
1429
+ "step": 2100
1430
+ },
1431
+ {
1432
+ "epoch": 3.43,
1433
+ "eval_loss": 0.44933727383613586,
1434
+ "eval_runtime": 12.9431,
1435
+ "eval_samples_per_second": 90.473,
1436
+ "eval_steps_per_second": 11.357,
1437
+ "step": 2100
1438
+ },
1439
+ {
1440
+ "epoch": 3.44,
1441
+ "learning_rate": 0.0007140319715808172,
1442
+ "loss": 0.4758,
1443
+ "step": 2110
1444
+ },
1445
+ {
1446
+ "epoch": 3.46,
1447
+ "learning_rate": 0.0007122557726465364,
1448
+ "loss": 0.4585,
1449
+ "step": 2120
1450
+ },
1451
+ {
1452
+ "epoch": 3.47,
1453
+ "learning_rate": 0.0007104795737122558,
1454
+ "loss": 0.4386,
1455
+ "step": 2130
1456
+ },
1457
+ {
1458
+ "epoch": 3.49,
1459
+ "learning_rate": 0.0007087033747779752,
1460
+ "loss": 0.4575,
1461
+ "step": 2140
1462
+ },
1463
+ {
1464
+ "epoch": 3.51,
1465
+ "learning_rate": 0.0007069271758436945,
1466
+ "loss": 0.46,
1467
+ "step": 2150
1468
+ },
1469
+ {
1470
+ "epoch": 3.52,
1471
+ "learning_rate": 0.0007051509769094139,
1472
+ "loss": 0.4529,
1473
+ "step": 2160
1474
+ },
1475
+ {
1476
+ "epoch": 3.54,
1477
+ "learning_rate": 0.0007033747779751333,
1478
+ "loss": 0.4723,
1479
+ "step": 2170
1480
+ },
1481
+ {
1482
+ "epoch": 3.56,
1483
+ "learning_rate": 0.0007015985790408526,
1484
+ "loss": 0.47,
1485
+ "step": 2180
1486
+ },
1487
+ {
1488
+ "epoch": 3.57,
1489
+ "learning_rate": 0.000699822380106572,
1490
+ "loss": 0.4952,
1491
+ "step": 2190
1492
+ },
1493
+ {
1494
+ "epoch": 3.59,
1495
+ "learning_rate": 0.0006980461811722914,
1496
+ "loss": 0.4488,
1497
+ "step": 2200
1498
+ },
1499
+ {
1500
+ "epoch": 3.59,
1501
+ "eval_loss": 0.43646687269210815,
1502
+ "eval_runtime": 13.2627,
1503
+ "eval_samples_per_second": 88.293,
1504
+ "eval_steps_per_second": 11.084,
1505
+ "step": 2200
1506
+ },
1507
+ {
1508
+ "epoch": 3.61,
1509
+ "learning_rate": 0.0006962699822380107,
1510
+ "loss": 0.4558,
1511
+ "step": 2210
1512
+ },
1513
+ {
1514
+ "epoch": 3.62,
1515
+ "learning_rate": 0.0006944937833037301,
1516
+ "loss": 0.4657,
1517
+ "step": 2220
1518
+ },
1519
+ {
1520
+ "epoch": 3.64,
1521
+ "learning_rate": 0.0006927175843694495,
1522
+ "loss": 0.4671,
1523
+ "step": 2230
1524
+ },
1525
+ {
1526
+ "epoch": 3.65,
1527
+ "learning_rate": 0.0006909413854351687,
1528
+ "loss": 0.5003,
1529
+ "step": 2240
1530
+ },
1531
+ {
1532
+ "epoch": 3.67,
1533
+ "learning_rate": 0.0006891651865008881,
1534
+ "loss": 0.4523,
1535
+ "step": 2250
1536
+ },
1537
+ {
1538
+ "epoch": 3.69,
1539
+ "learning_rate": 0.0006873889875666075,
1540
+ "loss": 0.4466,
1541
+ "step": 2260
1542
+ },
1543
+ {
1544
+ "epoch": 3.7,
1545
+ "learning_rate": 0.0006856127886323268,
1546
+ "loss": 0.4448,
1547
+ "step": 2270
1548
+ },
1549
+ {
1550
+ "epoch": 3.72,
1551
+ "learning_rate": 0.0006838365896980462,
1552
+ "loss": 0.4491,
1553
+ "step": 2280
1554
+ },
1555
+ {
1556
+ "epoch": 3.74,
1557
+ "learning_rate": 0.0006820603907637656,
1558
+ "loss": 0.4719,
1559
+ "step": 2290
1560
+ },
1561
+ {
1562
+ "epoch": 3.75,
1563
+ "learning_rate": 0.0006802841918294849,
1564
+ "loss": 0.4742,
1565
+ "step": 2300
1566
+ },
1567
+ {
1568
+ "epoch": 3.75,
1569
+ "eval_loss": 0.43938902020454407,
1570
+ "eval_runtime": 13.1019,
1571
+ "eval_samples_per_second": 89.376,
1572
+ "eval_steps_per_second": 11.22,
1573
+ "step": 2300
1574
+ },
1575
+ {
1576
+ "epoch": 3.77,
1577
+ "learning_rate": 0.0006785079928952043,
1578
+ "loss": 0.4709,
1579
+ "step": 2310
1580
+ },
1581
+ {
1582
+ "epoch": 3.78,
1583
+ "learning_rate": 0.0006767317939609237,
1584
+ "loss": 0.4575,
1585
+ "step": 2320
1586
+ },
1587
+ {
1588
+ "epoch": 3.8,
1589
+ "learning_rate": 0.000674955595026643,
1590
+ "loss": 0.4688,
1591
+ "step": 2330
1592
+ },
1593
+ {
1594
+ "epoch": 3.82,
1595
+ "learning_rate": 0.0006731793960923623,
1596
+ "loss": 0.4552,
1597
+ "step": 2340
1598
+ },
1599
+ {
1600
+ "epoch": 3.83,
1601
+ "learning_rate": 0.0006714031971580817,
1602
+ "loss": 0.4515,
1603
+ "step": 2350
1604
+ },
1605
+ {
1606
+ "epoch": 3.85,
1607
+ "learning_rate": 0.0006696269982238011,
1608
+ "loss": 0.455,
1609
+ "step": 2360
1610
+ },
1611
+ {
1612
+ "epoch": 3.87,
1613
+ "learning_rate": 0.0006678507992895204,
1614
+ "loss": 0.4465,
1615
+ "step": 2370
1616
+ },
1617
+ {
1618
+ "epoch": 3.88,
1619
+ "learning_rate": 0.0006660746003552398,
1620
+ "loss": 0.479,
1621
+ "step": 2380
1622
+ },
1623
+ {
1624
+ "epoch": 3.9,
1625
+ "learning_rate": 0.0006642984014209592,
1626
+ "loss": 0.4492,
1627
+ "step": 2390
1628
+ },
1629
+ {
1630
+ "epoch": 3.92,
1631
+ "learning_rate": 0.0006625222024866785,
1632
+ "loss": 0.4465,
1633
+ "step": 2400
1634
+ },
1635
+ {
1636
+ "epoch": 3.92,
1637
+ "eval_loss": 0.4411802589893341,
1638
+ "eval_runtime": 12.9494,
1639
+ "eval_samples_per_second": 90.429,
1640
+ "eval_steps_per_second": 11.352,
1641
+ "step": 2400
1642
+ },
1643
+ {
1644
+ "epoch": 3.93,
1645
+ "learning_rate": 0.0006607460035523979,
1646
+ "loss": 0.4851,
1647
+ "step": 2410
1648
+ },
1649
+ {
1650
+ "epoch": 3.95,
1651
+ "learning_rate": 0.0006589698046181173,
1652
+ "loss": 0.4414,
1653
+ "step": 2420
1654
+ },
1655
+ {
1656
+ "epoch": 3.96,
1657
+ "learning_rate": 0.0006571936056838366,
1658
+ "loss": 0.4333,
1659
+ "step": 2430
1660
+ },
1661
+ {
1662
+ "epoch": 3.98,
1663
+ "learning_rate": 0.000655417406749556,
1664
+ "loss": 0.4733,
1665
+ "step": 2440
1666
+ },
1667
+ {
1668
+ "epoch": 4.0,
1669
+ "learning_rate": 0.0006536412078152753,
1670
+ "loss": 0.4667,
1671
+ "step": 2450
1672
+ },
1673
+ {
1674
+ "epoch": 4.01,
1675
+ "learning_rate": 0.0006518650088809946,
1676
+ "loss": 0.4381,
1677
+ "step": 2460
1678
+ },
1679
+ {
1680
+ "epoch": 4.03,
1681
+ "learning_rate": 0.000650088809946714,
1682
+ "loss": 0.4267,
1683
+ "step": 2470
1684
+ },
1685
+ {
1686
+ "epoch": 4.05,
1687
+ "learning_rate": 0.0006483126110124334,
1688
+ "loss": 0.4361,
1689
+ "step": 2480
1690
+ },
1691
+ {
1692
+ "epoch": 4.06,
1693
+ "learning_rate": 0.0006465364120781527,
1694
+ "loss": 0.4636,
1695
+ "step": 2490
1696
+ },
1697
+ {
1698
+ "epoch": 4.08,
1699
+ "learning_rate": 0.0006447602131438721,
1700
+ "loss": 0.4578,
1701
+ "step": 2500
1702
+ },
1703
+ {
1704
+ "epoch": 4.08,
1705
+ "eval_loss": 0.4399421811103821,
1706
+ "eval_runtime": 13.1521,
1707
+ "eval_samples_per_second": 89.035,
1708
+ "eval_steps_per_second": 11.177,
1709
+ "step": 2500
1710
  }
1711
  ],
1712
  "logging_steps": 10,
1713
  "max_steps": 6130,
1714
  "num_train_epochs": 10,
1715
  "save_steps": 500,
1716
+ "total_flos": 4719092173406208.0,
1717
  "trial_name": null,
1718
  "trial_params": null
1719
  }