Instructions to use somaia02/arabart-gec-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use somaia02/arabart-gec-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("moussaKam/AraBART") model = PeftModel.from_pretrained(base_model, "somaia02/arabart-gec-lora") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 4000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5323528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9835ad6bb2b08ae316f9f11b0e8d1d3ebd81d56c95ce6836b93250c69a0da6c1
|
| 3 |
size 5323528
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10707706
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d58d2d3f9f8d82b76b4b01cc178bb373ee5e5c36fe33fa652a1d1f1d22485e65
|
| 3 |
size 10707706
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5ed3dd25bda0e52429ee4870ac2ba7e5a4f0851368f27ee87285fe0a5714834
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f019f73cf3f7accb65e9b564bf8a83e5db913dbd19c9517ca220494d620a381
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "bart_lora_outputs\\checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2387,13 +2387,353 @@
|
|
| 2387 |
"eval_samples_per_second": 90.231,
|
| 2388 |
"eval_steps_per_second": 11.327,
|
| 2389 |
"step": 3500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2390 |
}
|
| 2391 |
],
|
| 2392 |
"logging_steps": 10,
|
| 2393 |
"max_steps": 6130,
|
| 2394 |
"num_train_epochs": 10,
|
| 2395 |
"save_steps": 500,
|
| 2396 |
-
"total_flos":
|
| 2397 |
"trial_name": null,
|
| 2398 |
"trial_params": null
|
| 2399 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.42139747738838196,
|
| 3 |
+
"best_model_checkpoint": "bart_lora_outputs\\checkpoint-4000",
|
| 4 |
+
"epoch": 6.525285481239804,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 4000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2387 |
"eval_samples_per_second": 90.231,
|
| 2388 |
"eval_steps_per_second": 11.327,
|
| 2389 |
"step": 3500
|
| 2390 |
+
},
|
| 2391 |
+
{
|
| 2392 |
+
"epoch": 5.73,
|
| 2393 |
+
"learning_rate": 0.0004653641207815276,
|
| 2394 |
+
"loss": 0.4377,
|
| 2395 |
+
"step": 3510
|
| 2396 |
+
},
|
| 2397 |
+
{
|
| 2398 |
+
"epoch": 5.74,
|
| 2399 |
+
"learning_rate": 0.0004635879218472469,
|
| 2400 |
+
"loss": 0.4315,
|
| 2401 |
+
"step": 3520
|
| 2402 |
+
},
|
| 2403 |
+
{
|
| 2404 |
+
"epoch": 5.76,
|
| 2405 |
+
"learning_rate": 0.00046181172291296627,
|
| 2406 |
+
"loss": 0.4392,
|
| 2407 |
+
"step": 3530
|
| 2408 |
+
},
|
| 2409 |
+
{
|
| 2410 |
+
"epoch": 5.77,
|
| 2411 |
+
"learning_rate": 0.00046003552397868566,
|
| 2412 |
+
"loss": 0.4151,
|
| 2413 |
+
"step": 3540
|
| 2414 |
+
},
|
| 2415 |
+
{
|
| 2416 |
+
"epoch": 5.79,
|
| 2417 |
+
"learning_rate": 0.000458259325044405,
|
| 2418 |
+
"loss": 0.3884,
|
| 2419 |
+
"step": 3550
|
| 2420 |
+
},
|
| 2421 |
+
{
|
| 2422 |
+
"epoch": 5.81,
|
| 2423 |
+
"learning_rate": 0.00045648312611012434,
|
| 2424 |
+
"loss": 0.4362,
|
| 2425 |
+
"step": 3560
|
| 2426 |
+
},
|
| 2427 |
+
{
|
| 2428 |
+
"epoch": 5.82,
|
| 2429 |
+
"learning_rate": 0.00045470692717584373,
|
| 2430 |
+
"loss": 0.4401,
|
| 2431 |
+
"step": 3570
|
| 2432 |
+
},
|
| 2433 |
+
{
|
| 2434 |
+
"epoch": 5.84,
|
| 2435 |
+
"learning_rate": 0.00045293072824156307,
|
| 2436 |
+
"loss": 0.4199,
|
| 2437 |
+
"step": 3580
|
| 2438 |
+
},
|
| 2439 |
+
{
|
| 2440 |
+
"epoch": 5.86,
|
| 2441 |
+
"learning_rate": 0.0004511545293072824,
|
| 2442 |
+
"loss": 0.4,
|
| 2443 |
+
"step": 3590
|
| 2444 |
+
},
|
| 2445 |
+
{
|
| 2446 |
+
"epoch": 5.87,
|
| 2447 |
+
"learning_rate": 0.0004493783303730018,
|
| 2448 |
+
"loss": 0.4056,
|
| 2449 |
+
"step": 3600
|
| 2450 |
+
},
|
| 2451 |
+
{
|
| 2452 |
+
"epoch": 5.87,
|
| 2453 |
+
"eval_loss": 0.42200714349746704,
|
| 2454 |
+
"eval_runtime": 13.1531,
|
| 2455 |
+
"eval_samples_per_second": 89.029,
|
| 2456 |
+
"eval_steps_per_second": 11.176,
|
| 2457 |
+
"step": 3600
|
| 2458 |
+
},
|
| 2459 |
+
{
|
| 2460 |
+
"epoch": 5.89,
|
| 2461 |
+
"learning_rate": 0.00044760213143872114,
|
| 2462 |
+
"loss": 0.4444,
|
| 2463 |
+
"step": 3610
|
| 2464 |
+
},
|
| 2465 |
+
{
|
| 2466 |
+
"epoch": 5.91,
|
| 2467 |
+
"learning_rate": 0.0004458259325044405,
|
| 2468 |
+
"loss": 0.4229,
|
| 2469 |
+
"step": 3620
|
| 2470 |
+
},
|
| 2471 |
+
{
|
| 2472 |
+
"epoch": 5.92,
|
| 2473 |
+
"learning_rate": 0.0004440497335701599,
|
| 2474 |
+
"loss": 0.4206,
|
| 2475 |
+
"step": 3630
|
| 2476 |
+
},
|
| 2477 |
+
{
|
| 2478 |
+
"epoch": 5.94,
|
| 2479 |
+
"learning_rate": 0.0004422735346358792,
|
| 2480 |
+
"loss": 0.4243,
|
| 2481 |
+
"step": 3640
|
| 2482 |
+
},
|
| 2483 |
+
{
|
| 2484 |
+
"epoch": 5.95,
|
| 2485 |
+
"learning_rate": 0.0004404973357015986,
|
| 2486 |
+
"loss": 0.4117,
|
| 2487 |
+
"step": 3650
|
| 2488 |
+
},
|
| 2489 |
+
{
|
| 2490 |
+
"epoch": 5.97,
|
| 2491 |
+
"learning_rate": 0.00043872113676731795,
|
| 2492 |
+
"loss": 0.4375,
|
| 2493 |
+
"step": 3660
|
| 2494 |
+
},
|
| 2495 |
+
{
|
| 2496 |
+
"epoch": 5.99,
|
| 2497 |
+
"learning_rate": 0.0004369449378330373,
|
| 2498 |
+
"loss": 0.4372,
|
| 2499 |
+
"step": 3670
|
| 2500 |
+
},
|
| 2501 |
+
{
|
| 2502 |
+
"epoch": 6.0,
|
| 2503 |
+
"learning_rate": 0.0004351687388987567,
|
| 2504 |
+
"loss": 0.4045,
|
| 2505 |
+
"step": 3680
|
| 2506 |
+
},
|
| 2507 |
+
{
|
| 2508 |
+
"epoch": 6.02,
|
| 2509 |
+
"learning_rate": 0.000433392539964476,
|
| 2510 |
+
"loss": 0.3872,
|
| 2511 |
+
"step": 3690
|
| 2512 |
+
},
|
| 2513 |
+
{
|
| 2514 |
+
"epoch": 6.04,
|
| 2515 |
+
"learning_rate": 0.00043161634103019536,
|
| 2516 |
+
"loss": 0.4049,
|
| 2517 |
+
"step": 3700
|
| 2518 |
+
},
|
| 2519 |
+
{
|
| 2520 |
+
"epoch": 6.04,
|
| 2521 |
+
"eval_loss": 0.4243859648704529,
|
| 2522 |
+
"eval_runtime": 12.9291,
|
| 2523 |
+
"eval_samples_per_second": 90.571,
|
| 2524 |
+
"eval_steps_per_second": 11.37,
|
| 2525 |
+
"step": 3700
|
| 2526 |
+
},
|
| 2527 |
+
{
|
| 2528 |
+
"epoch": 6.05,
|
| 2529 |
+
"learning_rate": 0.00042984014209591475,
|
| 2530 |
+
"loss": 0.4333,
|
| 2531 |
+
"step": 3710
|
| 2532 |
+
},
|
| 2533 |
+
{
|
| 2534 |
+
"epoch": 6.07,
|
| 2535 |
+
"learning_rate": 0.0004280639431616341,
|
| 2536 |
+
"loss": 0.4061,
|
| 2537 |
+
"step": 3720
|
| 2538 |
+
},
|
| 2539 |
+
{
|
| 2540 |
+
"epoch": 6.08,
|
| 2541 |
+
"learning_rate": 0.00042628774422735343,
|
| 2542 |
+
"loss": 0.3993,
|
| 2543 |
+
"step": 3730
|
| 2544 |
+
},
|
| 2545 |
+
{
|
| 2546 |
+
"epoch": 6.1,
|
| 2547 |
+
"learning_rate": 0.0004245115452930728,
|
| 2548 |
+
"loss": 0.3988,
|
| 2549 |
+
"step": 3740
|
| 2550 |
+
},
|
| 2551 |
+
{
|
| 2552 |
+
"epoch": 6.12,
|
| 2553 |
+
"learning_rate": 0.00042273534635879216,
|
| 2554 |
+
"loss": 0.3962,
|
| 2555 |
+
"step": 3750
|
| 2556 |
+
},
|
| 2557 |
+
{
|
| 2558 |
+
"epoch": 6.13,
|
| 2559 |
+
"learning_rate": 0.00042095914742451156,
|
| 2560 |
+
"loss": 0.3863,
|
| 2561 |
+
"step": 3760
|
| 2562 |
+
},
|
| 2563 |
+
{
|
| 2564 |
+
"epoch": 6.15,
|
| 2565 |
+
"learning_rate": 0.0004191829484902309,
|
| 2566 |
+
"loss": 0.4184,
|
| 2567 |
+
"step": 3770
|
| 2568 |
+
},
|
| 2569 |
+
{
|
| 2570 |
+
"epoch": 6.17,
|
| 2571 |
+
"learning_rate": 0.00041740674955595023,
|
| 2572 |
+
"loss": 0.4171,
|
| 2573 |
+
"step": 3780
|
| 2574 |
+
},
|
| 2575 |
+
{
|
| 2576 |
+
"epoch": 6.18,
|
| 2577 |
+
"learning_rate": 0.0004156305506216697,
|
| 2578 |
+
"loss": 0.416,
|
| 2579 |
+
"step": 3790
|
| 2580 |
+
},
|
| 2581 |
+
{
|
| 2582 |
+
"epoch": 6.2,
|
| 2583 |
+
"learning_rate": 0.000413854351687389,
|
| 2584 |
+
"loss": 0.4311,
|
| 2585 |
+
"step": 3800
|
| 2586 |
+
},
|
| 2587 |
+
{
|
| 2588 |
+
"epoch": 6.2,
|
| 2589 |
+
"eval_loss": 0.42171674966812134,
|
| 2590 |
+
"eval_runtime": 13.1531,
|
| 2591 |
+
"eval_samples_per_second": 89.029,
|
| 2592 |
+
"eval_steps_per_second": 11.176,
|
| 2593 |
+
"step": 3800
|
| 2594 |
+
},
|
| 2595 |
+
{
|
| 2596 |
+
"epoch": 6.22,
|
| 2597 |
+
"learning_rate": 0.00041207815275310836,
|
| 2598 |
+
"loss": 0.4266,
|
| 2599 |
+
"step": 3810
|
| 2600 |
+
},
|
| 2601 |
+
{
|
| 2602 |
+
"epoch": 6.23,
|
| 2603 |
+
"learning_rate": 0.00041030195381882775,
|
| 2604 |
+
"loss": 0.3933,
|
| 2605 |
+
"step": 3820
|
| 2606 |
+
},
|
| 2607 |
+
{
|
| 2608 |
+
"epoch": 6.25,
|
| 2609 |
+
"learning_rate": 0.0004085257548845471,
|
| 2610 |
+
"loss": 0.4154,
|
| 2611 |
+
"step": 3830
|
| 2612 |
+
},
|
| 2613 |
+
{
|
| 2614 |
+
"epoch": 6.26,
|
| 2615 |
+
"learning_rate": 0.00040674955595026643,
|
| 2616 |
+
"loss": 0.4321,
|
| 2617 |
+
"step": 3840
|
| 2618 |
+
},
|
| 2619 |
+
{
|
| 2620 |
+
"epoch": 6.28,
|
| 2621 |
+
"learning_rate": 0.0004049733570159858,
|
| 2622 |
+
"loss": 0.4338,
|
| 2623 |
+
"step": 3850
|
| 2624 |
+
},
|
| 2625 |
+
{
|
| 2626 |
+
"epoch": 6.3,
|
| 2627 |
+
"learning_rate": 0.00040319715808170517,
|
| 2628 |
+
"loss": 0.4204,
|
| 2629 |
+
"step": 3860
|
| 2630 |
+
},
|
| 2631 |
+
{
|
| 2632 |
+
"epoch": 6.31,
|
| 2633 |
+
"learning_rate": 0.00040142095914742456,
|
| 2634 |
+
"loss": 0.4274,
|
| 2635 |
+
"step": 3870
|
| 2636 |
+
},
|
| 2637 |
+
{
|
| 2638 |
+
"epoch": 6.33,
|
| 2639 |
+
"learning_rate": 0.0003996447602131439,
|
| 2640 |
+
"loss": 0.3985,
|
| 2641 |
+
"step": 3880
|
| 2642 |
+
},
|
| 2643 |
+
{
|
| 2644 |
+
"epoch": 6.35,
|
| 2645 |
+
"learning_rate": 0.00039786856127886324,
|
| 2646 |
+
"loss": 0.4232,
|
| 2647 |
+
"step": 3890
|
| 2648 |
+
},
|
| 2649 |
+
{
|
| 2650 |
+
"epoch": 6.36,
|
| 2651 |
+
"learning_rate": 0.00039609236234458263,
|
| 2652 |
+
"loss": 0.3799,
|
| 2653 |
+
"step": 3900
|
| 2654 |
+
},
|
| 2655 |
+
{
|
| 2656 |
+
"epoch": 6.36,
|
| 2657 |
+
"eval_loss": 0.4199593961238861,
|
| 2658 |
+
"eval_runtime": 12.9106,
|
| 2659 |
+
"eval_samples_per_second": 90.7,
|
| 2660 |
+
"eval_steps_per_second": 11.386,
|
| 2661 |
+
"step": 3900
|
| 2662 |
+
},
|
| 2663 |
+
{
|
| 2664 |
+
"epoch": 6.38,
|
| 2665 |
+
"learning_rate": 0.00039431616341030197,
|
| 2666 |
+
"loss": 0.4061,
|
| 2667 |
+
"step": 3910
|
| 2668 |
+
},
|
| 2669 |
+
{
|
| 2670 |
+
"epoch": 6.39,
|
| 2671 |
+
"learning_rate": 0.0003925399644760213,
|
| 2672 |
+
"loss": 0.398,
|
| 2673 |
+
"step": 3920
|
| 2674 |
+
},
|
| 2675 |
+
{
|
| 2676 |
+
"epoch": 6.41,
|
| 2677 |
+
"learning_rate": 0.0003907637655417407,
|
| 2678 |
+
"loss": 0.3987,
|
| 2679 |
+
"step": 3930
|
| 2680 |
+
},
|
| 2681 |
+
{
|
| 2682 |
+
"epoch": 6.43,
|
| 2683 |
+
"learning_rate": 0.00038898756660746004,
|
| 2684 |
+
"loss": 0.3811,
|
| 2685 |
+
"step": 3940
|
| 2686 |
+
},
|
| 2687 |
+
{
|
| 2688 |
+
"epoch": 6.44,
|
| 2689 |
+
"learning_rate": 0.0003872113676731794,
|
| 2690 |
+
"loss": 0.4133,
|
| 2691 |
+
"step": 3950
|
| 2692 |
+
},
|
| 2693 |
+
{
|
| 2694 |
+
"epoch": 6.46,
|
| 2695 |
+
"learning_rate": 0.0003854351687388988,
|
| 2696 |
+
"loss": 0.4053,
|
| 2697 |
+
"step": 3960
|
| 2698 |
+
},
|
| 2699 |
+
{
|
| 2700 |
+
"epoch": 6.48,
|
| 2701 |
+
"learning_rate": 0.0003836589698046181,
|
| 2702 |
+
"loss": 0.4046,
|
| 2703 |
+
"step": 3970
|
| 2704 |
+
},
|
| 2705 |
+
{
|
| 2706 |
+
"epoch": 6.49,
|
| 2707 |
+
"learning_rate": 0.00038188277087033745,
|
| 2708 |
+
"loss": 0.3875,
|
| 2709 |
+
"step": 3980
|
| 2710 |
+
},
|
| 2711 |
+
{
|
| 2712 |
+
"epoch": 6.51,
|
| 2713 |
+
"learning_rate": 0.00038010657193605685,
|
| 2714 |
+
"loss": 0.4017,
|
| 2715 |
+
"step": 3990
|
| 2716 |
+
},
|
| 2717 |
+
{
|
| 2718 |
+
"epoch": 6.53,
|
| 2719 |
+
"learning_rate": 0.0003783303730017762,
|
| 2720 |
+
"loss": 0.4281,
|
| 2721 |
+
"step": 4000
|
| 2722 |
+
},
|
| 2723 |
+
{
|
| 2724 |
+
"epoch": 6.53,
|
| 2725 |
+
"eval_loss": 0.42139747738838196,
|
| 2726 |
+
"eval_runtime": 13.202,
|
| 2727 |
+
"eval_samples_per_second": 88.699,
|
| 2728 |
+
"eval_steps_per_second": 11.135,
|
| 2729 |
+
"step": 4000
|
| 2730 |
}
|
| 2731 |
],
|
| 2732 |
"logging_steps": 10,
|
| 2733 |
"max_steps": 6130,
|
| 2734 |
"num_train_epochs": 10,
|
| 2735 |
"save_steps": 500,
|
| 2736 |
+
"total_flos": 7559248409395200.0,
|
| 2737 |
"trial_name": null,
|
| 2738 |
"trial_params": null
|
| 2739 |
}
|