somaia02 commited on
Commit
58f60dd
·
1 Parent(s): 994b899

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdb65aad7f5881ab6c4f55f41fa5aa18d8451e3142560b840323390718fbf1c0
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9835ad6bb2b08ae316f9f11b0e8d1d3ebd81d56c95ce6836b93250c69a0da6c1
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed6c83118141e829ba55ae59bd3661bdfd7b741a28a938857f067ee13bda6e1f
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d58d2d3f9f8d82b76b4b01cc178bb373ee5e5c36fe33fa652a1d1f1d22485e65
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cd6b45f83835221dcdf23f243180950e962516b14dd7ff28fbb69bb83387d6c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ed3dd25bda0e52429ee4870ac2ba7e5a4f0851368f27ee87285fe0a5714834
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15c59c2a801a017f8138f2d9ac3b94589723ddfbbf1ae570418aa9bfac089535
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f019f73cf3f7accb65e9b564bf8a83e5db913dbd19c9517ca220494d620a381
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4271390736103058,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-3000",
4
- "epoch": 5.709624796084829,
5
  "eval_steps": 100,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2387,13 +2387,353 @@
2387
  "eval_samples_per_second": 90.231,
2388
  "eval_steps_per_second": 11.327,
2389
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2390
  }
2391
  ],
2392
  "logging_steps": 10,
2393
  "max_steps": 6130,
2394
  "num_train_epochs": 10,
2395
  "save_steps": 500,
2396
- "total_flos": 6614658553872384.0,
2397
  "trial_name": null,
2398
  "trial_params": null
2399
  }
 
1
  {
2
+ "best_metric": 0.42139747738838196,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-4000",
4
+ "epoch": 6.525285481239804,
5
  "eval_steps": 100,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2387
  "eval_samples_per_second": 90.231,
2388
  "eval_steps_per_second": 11.327,
2389
  "step": 3500
2390
+ },
2391
+ {
2392
+ "epoch": 5.73,
2393
+ "learning_rate": 0.0004653641207815276,
2394
+ "loss": 0.4377,
2395
+ "step": 3510
2396
+ },
2397
+ {
2398
+ "epoch": 5.74,
2399
+ "learning_rate": 0.0004635879218472469,
2400
+ "loss": 0.4315,
2401
+ "step": 3520
2402
+ },
2403
+ {
2404
+ "epoch": 5.76,
2405
+ "learning_rate": 0.00046181172291296627,
2406
+ "loss": 0.4392,
2407
+ "step": 3530
2408
+ },
2409
+ {
2410
+ "epoch": 5.77,
2411
+ "learning_rate": 0.00046003552397868566,
2412
+ "loss": 0.4151,
2413
+ "step": 3540
2414
+ },
2415
+ {
2416
+ "epoch": 5.79,
2417
+ "learning_rate": 0.000458259325044405,
2418
+ "loss": 0.3884,
2419
+ "step": 3550
2420
+ },
2421
+ {
2422
+ "epoch": 5.81,
2423
+ "learning_rate": 0.00045648312611012434,
2424
+ "loss": 0.4362,
2425
+ "step": 3560
2426
+ },
2427
+ {
2428
+ "epoch": 5.82,
2429
+ "learning_rate": 0.00045470692717584373,
2430
+ "loss": 0.4401,
2431
+ "step": 3570
2432
+ },
2433
+ {
2434
+ "epoch": 5.84,
2435
+ "learning_rate": 0.00045293072824156307,
2436
+ "loss": 0.4199,
2437
+ "step": 3580
2438
+ },
2439
+ {
2440
+ "epoch": 5.86,
2441
+ "learning_rate": 0.0004511545293072824,
2442
+ "loss": 0.4,
2443
+ "step": 3590
2444
+ },
2445
+ {
2446
+ "epoch": 5.87,
2447
+ "learning_rate": 0.0004493783303730018,
2448
+ "loss": 0.4056,
2449
+ "step": 3600
2450
+ },
2451
+ {
2452
+ "epoch": 5.87,
2453
+ "eval_loss": 0.42200714349746704,
2454
+ "eval_runtime": 13.1531,
2455
+ "eval_samples_per_second": 89.029,
2456
+ "eval_steps_per_second": 11.176,
2457
+ "step": 3600
2458
+ },
2459
+ {
2460
+ "epoch": 5.89,
2461
+ "learning_rate": 0.00044760213143872114,
2462
+ "loss": 0.4444,
2463
+ "step": 3610
2464
+ },
2465
+ {
2466
+ "epoch": 5.91,
2467
+ "learning_rate": 0.0004458259325044405,
2468
+ "loss": 0.4229,
2469
+ "step": 3620
2470
+ },
2471
+ {
2472
+ "epoch": 5.92,
2473
+ "learning_rate": 0.0004440497335701599,
2474
+ "loss": 0.4206,
2475
+ "step": 3630
2476
+ },
2477
+ {
2478
+ "epoch": 5.94,
2479
+ "learning_rate": 0.0004422735346358792,
2480
+ "loss": 0.4243,
2481
+ "step": 3640
2482
+ },
2483
+ {
2484
+ "epoch": 5.95,
2485
+ "learning_rate": 0.0004404973357015986,
2486
+ "loss": 0.4117,
2487
+ "step": 3650
2488
+ },
2489
+ {
2490
+ "epoch": 5.97,
2491
+ "learning_rate": 0.00043872113676731795,
2492
+ "loss": 0.4375,
2493
+ "step": 3660
2494
+ },
2495
+ {
2496
+ "epoch": 5.99,
2497
+ "learning_rate": 0.0004369449378330373,
2498
+ "loss": 0.4372,
2499
+ "step": 3670
2500
+ },
2501
+ {
2502
+ "epoch": 6.0,
2503
+ "learning_rate": 0.0004351687388987567,
2504
+ "loss": 0.4045,
2505
+ "step": 3680
2506
+ },
2507
+ {
2508
+ "epoch": 6.02,
2509
+ "learning_rate": 0.000433392539964476,
2510
+ "loss": 0.3872,
2511
+ "step": 3690
2512
+ },
2513
+ {
2514
+ "epoch": 6.04,
2515
+ "learning_rate": 0.00043161634103019536,
2516
+ "loss": 0.4049,
2517
+ "step": 3700
2518
+ },
2519
+ {
2520
+ "epoch": 6.04,
2521
+ "eval_loss": 0.4243859648704529,
2522
+ "eval_runtime": 12.9291,
2523
+ "eval_samples_per_second": 90.571,
2524
+ "eval_steps_per_second": 11.37,
2525
+ "step": 3700
2526
+ },
2527
+ {
2528
+ "epoch": 6.05,
2529
+ "learning_rate": 0.00042984014209591475,
2530
+ "loss": 0.4333,
2531
+ "step": 3710
2532
+ },
2533
+ {
2534
+ "epoch": 6.07,
2535
+ "learning_rate": 0.0004280639431616341,
2536
+ "loss": 0.4061,
2537
+ "step": 3720
2538
+ },
2539
+ {
2540
+ "epoch": 6.08,
2541
+ "learning_rate": 0.00042628774422735343,
2542
+ "loss": 0.3993,
2543
+ "step": 3730
2544
+ },
2545
+ {
2546
+ "epoch": 6.1,
2547
+ "learning_rate": 0.0004245115452930728,
2548
+ "loss": 0.3988,
2549
+ "step": 3740
2550
+ },
2551
+ {
2552
+ "epoch": 6.12,
2553
+ "learning_rate": 0.00042273534635879216,
2554
+ "loss": 0.3962,
2555
+ "step": 3750
2556
+ },
2557
+ {
2558
+ "epoch": 6.13,
2559
+ "learning_rate": 0.00042095914742451156,
2560
+ "loss": 0.3863,
2561
+ "step": 3760
2562
+ },
2563
+ {
2564
+ "epoch": 6.15,
2565
+ "learning_rate": 0.0004191829484902309,
2566
+ "loss": 0.4184,
2567
+ "step": 3770
2568
+ },
2569
+ {
2570
+ "epoch": 6.17,
2571
+ "learning_rate": 0.00041740674955595023,
2572
+ "loss": 0.4171,
2573
+ "step": 3780
2574
+ },
2575
+ {
2576
+ "epoch": 6.18,
2577
+ "learning_rate": 0.0004156305506216697,
2578
+ "loss": 0.416,
2579
+ "step": 3790
2580
+ },
2581
+ {
2582
+ "epoch": 6.2,
2583
+ "learning_rate": 0.000413854351687389,
2584
+ "loss": 0.4311,
2585
+ "step": 3800
2586
+ },
2587
+ {
2588
+ "epoch": 6.2,
2589
+ "eval_loss": 0.42171674966812134,
2590
+ "eval_runtime": 13.1531,
2591
+ "eval_samples_per_second": 89.029,
2592
+ "eval_steps_per_second": 11.176,
2593
+ "step": 3800
2594
+ },
2595
+ {
2596
+ "epoch": 6.22,
2597
+ "learning_rate": 0.00041207815275310836,
2598
+ "loss": 0.4266,
2599
+ "step": 3810
2600
+ },
2601
+ {
2602
+ "epoch": 6.23,
2603
+ "learning_rate": 0.00041030195381882775,
2604
+ "loss": 0.3933,
2605
+ "step": 3820
2606
+ },
2607
+ {
2608
+ "epoch": 6.25,
2609
+ "learning_rate": 0.0004085257548845471,
2610
+ "loss": 0.4154,
2611
+ "step": 3830
2612
+ },
2613
+ {
2614
+ "epoch": 6.26,
2615
+ "learning_rate": 0.00040674955595026643,
2616
+ "loss": 0.4321,
2617
+ "step": 3840
2618
+ },
2619
+ {
2620
+ "epoch": 6.28,
2621
+ "learning_rate": 0.0004049733570159858,
2622
+ "loss": 0.4338,
2623
+ "step": 3850
2624
+ },
2625
+ {
2626
+ "epoch": 6.3,
2627
+ "learning_rate": 0.00040319715808170517,
2628
+ "loss": 0.4204,
2629
+ "step": 3860
2630
+ },
2631
+ {
2632
+ "epoch": 6.31,
2633
+ "learning_rate": 0.00040142095914742456,
2634
+ "loss": 0.4274,
2635
+ "step": 3870
2636
+ },
2637
+ {
2638
+ "epoch": 6.33,
2639
+ "learning_rate": 0.0003996447602131439,
2640
+ "loss": 0.3985,
2641
+ "step": 3880
2642
+ },
2643
+ {
2644
+ "epoch": 6.35,
2645
+ "learning_rate": 0.00039786856127886324,
2646
+ "loss": 0.4232,
2647
+ "step": 3890
2648
+ },
2649
+ {
2650
+ "epoch": 6.36,
2651
+ "learning_rate": 0.00039609236234458263,
2652
+ "loss": 0.3799,
2653
+ "step": 3900
2654
+ },
2655
+ {
2656
+ "epoch": 6.36,
2657
+ "eval_loss": 0.4199593961238861,
2658
+ "eval_runtime": 12.9106,
2659
+ "eval_samples_per_second": 90.7,
2660
+ "eval_steps_per_second": 11.386,
2661
+ "step": 3900
2662
+ },
2663
+ {
2664
+ "epoch": 6.38,
2665
+ "learning_rate": 0.00039431616341030197,
2666
+ "loss": 0.4061,
2667
+ "step": 3910
2668
+ },
2669
+ {
2670
+ "epoch": 6.39,
2671
+ "learning_rate": 0.0003925399644760213,
2672
+ "loss": 0.398,
2673
+ "step": 3920
2674
+ },
2675
+ {
2676
+ "epoch": 6.41,
2677
+ "learning_rate": 0.0003907637655417407,
2678
+ "loss": 0.3987,
2679
+ "step": 3930
2680
+ },
2681
+ {
2682
+ "epoch": 6.43,
2683
+ "learning_rate": 0.00038898756660746004,
2684
+ "loss": 0.3811,
2685
+ "step": 3940
2686
+ },
2687
+ {
2688
+ "epoch": 6.44,
2689
+ "learning_rate": 0.0003872113676731794,
2690
+ "loss": 0.4133,
2691
+ "step": 3950
2692
+ },
2693
+ {
2694
+ "epoch": 6.46,
2695
+ "learning_rate": 0.0003854351687388988,
2696
+ "loss": 0.4053,
2697
+ "step": 3960
2698
+ },
2699
+ {
2700
+ "epoch": 6.48,
2701
+ "learning_rate": 0.0003836589698046181,
2702
+ "loss": 0.4046,
2703
+ "step": 3970
2704
+ },
2705
+ {
2706
+ "epoch": 6.49,
2707
+ "learning_rate": 0.00038188277087033745,
2708
+ "loss": 0.3875,
2709
+ "step": 3980
2710
+ },
2711
+ {
2712
+ "epoch": 6.51,
2713
+ "learning_rate": 0.00038010657193605685,
2714
+ "loss": 0.4017,
2715
+ "step": 3990
2716
+ },
2717
+ {
2718
+ "epoch": 6.53,
2719
+ "learning_rate": 0.0003783303730017762,
2720
+ "loss": 0.4281,
2721
+ "step": 4000
2722
+ },
2723
+ {
2724
+ "epoch": 6.53,
2725
+ "eval_loss": 0.42139747738838196,
2726
+ "eval_runtime": 13.202,
2727
+ "eval_samples_per_second": 88.699,
2728
+ "eval_steps_per_second": 11.135,
2729
+ "step": 4000
2730
  }
2731
  ],
2732
  "logging_steps": 10,
2733
  "max_steps": 6130,
2734
  "num_train_epochs": 10,
2735
  "save_steps": 500,
2736
+ "total_flos": 7559248409395200.0,
2737
  "trial_name": null,
2738
  "trial_params": null
2739
  }