Instructions to use Tohrumi/MistralAI_iwslt15_en_vi_manual with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Tohrumi/MistralAI_iwslt15_en_vi_manual with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("unsloth/mistral-7b-bnb-4bit") model = PeftModel.from_pretrained(base_model, "Tohrumi/MistralAI_iwslt15_en_vi_manual") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- Unsloth Studio
How to use Tohrumi/MistralAI_iwslt15_en_vi_manual with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Tohrumi/MistralAI_iwslt15_en_vi_manual to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Tohrumi/MistralAI_iwslt15_en_vi_manual to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Tohrumi/MistralAI_iwslt15_en_vi_manual to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Tohrumi/MistralAI_iwslt15_en_vi_manual", max_seq_length=2048, )
| [ | |
| { | |
| "loss": 1.2057, | |
| "grad_norm": 1.1315475702285767, | |
| "learning_rate": 0.00019992790844372354, | |
| "epoch": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "loss": 1.1553, | |
| "grad_norm": 1.134278655052185, | |
| "learning_rate": 0.00019985281307260223, | |
| "epoch": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 1.0896, | |
| "grad_norm": 1.4255499839782715, | |
| "learning_rate": 0.00019977771770148087, | |
| "epoch": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "loss": 1.1009, | |
| "grad_norm": 1.0321508646011353, | |
| "learning_rate": 0.00019970262233035956, | |
| "epoch": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 1.0906, | |
| "grad_norm": 1.065851092338562, | |
| "learning_rate": 0.00019962752695923823, | |
| "epoch": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "loss": 1.2285, | |
| "grad_norm": 1.3161128759384155, | |
| "learning_rate": 0.00019955243158811692, | |
| "epoch": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 1.2242, | |
| "grad_norm": 1.0888772010803223, | |
| "learning_rate": 0.0001994773362169956, | |
| "epoch": 0.0, | |
| "step": 175 | |
| }, | |
| { | |
| "loss": 1.1169, | |
| "grad_norm": 1.3239021301269531, | |
| "learning_rate": 0.00019940224084587428, | |
| "epoch": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 1.2012, | |
| "grad_norm": 1.0381895303726196, | |
| "learning_rate": 0.00019932714547475294, | |
| "epoch": 0.0, | |
| "step": 225 | |
| }, | |
| { | |
| "loss": 1.1967, | |
| "grad_norm": 1.1826602220535278, | |
| "learning_rate": 0.0001992520501036316, | |
| "epoch": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "loss": 1.1632, | |
| "grad_norm": 1.3360055685043335, | |
| "learning_rate": 0.0001991769547325103, | |
| "epoch": 0.0, | |
| "step": 275 | |
| }, | |
| { | |
| "loss": 1.1724, | |
| "grad_norm": 1.4393656253814697, | |
| "learning_rate": 0.00019910185936138896, | |
| "epoch": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "loss": 1.1483, | |
| "grad_norm": 0.7849452495574951, | |
| "learning_rate": 0.00019902676399026766, | |
| "epoch": 0.0, | |
| "step": 325 | |
| }, | |
| { | |
| "loss": 1.1145, | |
| "grad_norm": 1.1918078660964966, | |
| "learning_rate": 0.00019895166861914635, | |
| "epoch": 0.01, | |
| "step": 350 | |
| }, | |
| { | |
| "loss": 1.1695, | |
| "grad_norm": 0.9958588480949402, | |
| "learning_rate": 0.000198876573248025, | |
| "epoch": 0.01, | |
| "step": 375 | |
| }, | |
| { | |
| "loss": 1.1884, | |
| "grad_norm": 1.6034517288208008, | |
| "learning_rate": 0.00019880147787690368, | |
| "epoch": 0.01, | |
| "step": 400 | |
| }, | |
| { | |
| "loss": 1.1034, | |
| "grad_norm": 1.0951026678085327, | |
| "learning_rate": 0.00019872638250578235, | |
| "epoch": 0.01, | |
| "step": 425 | |
| }, | |
| { | |
| "loss": 1.1748, | |
| "grad_norm": 1.3773316144943237, | |
| "learning_rate": 0.00019865128713466104, | |
| "epoch": 0.01, | |
| "step": 450 | |
| }, | |
| { | |
| "loss": 1.1022, | |
| "grad_norm": 0.9198249578475952, | |
| "learning_rate": 0.0001985761917635397, | |
| "epoch": 0.01, | |
| "step": 475 | |
| }, | |
| { | |
| "loss": 1.1509, | |
| "grad_norm": 1.4825350046157837, | |
| "learning_rate": 0.0001985010963924184, | |
| "epoch": 0.01, | |
| "step": 500 | |
| }, | |
| { | |
| "loss": 1.1985, | |
| "grad_norm": 1.1909708976745605, | |
| "learning_rate": 0.00019842600102129706, | |
| "epoch": 0.01, | |
| "step": 525 | |
| }, | |
| { | |
| "loss": 1.2119, | |
| "grad_norm": 1.6249839067459106, | |
| "learning_rate": 0.00019835090565017573, | |
| "epoch": 0.01, | |
| "step": 550 | |
| }, | |
| { | |
| "loss": 1.1233, | |
| "grad_norm": 1.279699444770813, | |
| "learning_rate": 0.00019827581027905442, | |
| "epoch": 0.01, | |
| "step": 575 | |
| }, | |
| { | |
| "loss": 1.2015, | |
| "grad_norm": 1.2097326517105103, | |
| "learning_rate": 0.00019820071490793308, | |
| "epoch": 0.01, | |
| "step": 600 | |
| }, | |
| { | |
| "loss": 1.0881, | |
| "grad_norm": 1.3173500299453735, | |
| "learning_rate": 0.00019812561953681178, | |
| "epoch": 0.01, | |
| "step": 625 | |
| }, | |
| { | |
| "loss": 1.1941, | |
| "grad_norm": 1.4401942491531372, | |
| "learning_rate": 0.00019805052416569044, | |
| "epoch": 0.01, | |
| "step": 650 | |
| }, | |
| { | |
| "loss": 1.1371, | |
| "grad_norm": 1.4831886291503906, | |
| "learning_rate": 0.0001979754287945691, | |
| "epoch": 0.01, | |
| "step": 675 | |
| }, | |
| { | |
| "loss": 1.1284, | |
| "grad_norm": 1.3925830125808716, | |
| "learning_rate": 0.00019790033342344777, | |
| "epoch": 0.01, | |
| "step": 700 | |
| }, | |
| { | |
| "loss": 1.151, | |
| "grad_norm": 1.1581556797027588, | |
| "learning_rate": 0.00019782523805232647, | |
| "epoch": 0.01, | |
| "step": 725 | |
| }, | |
| { | |
| "loss": 1.1213, | |
| "grad_norm": 0.9816817045211792, | |
| "learning_rate": 0.00019775014268120513, | |
| "epoch": 0.01, | |
| "step": 750 | |
| }, | |
| { | |
| "loss": 1.189, | |
| "grad_norm": 1.771814227104187, | |
| "learning_rate": 0.00019767504731008382, | |
| "epoch": 0.01, | |
| "step": 775 | |
| }, | |
| { | |
| "loss": 1.1613, | |
| "grad_norm": 1.2354599237442017, | |
| "learning_rate": 0.00019759995193896252, | |
| "epoch": 0.01, | |
| "step": 800 | |
| }, | |
| { | |
| "loss": 1.1801, | |
| "grad_norm": 1.1712677478790283, | |
| "learning_rate": 0.00019752485656784115, | |
| "epoch": 0.01, | |
| "step": 825 | |
| }, | |
| { | |
| "loss": 1.1177, | |
| "grad_norm": 1.372815489768982, | |
| "learning_rate": 0.00019744976119671985, | |
| "epoch": 0.01, | |
| "step": 850 | |
| }, | |
| { | |
| "loss": 1.2127, | |
| "grad_norm": 1.7617968320846558, | |
| "learning_rate": 0.0001973746658255985, | |
| "epoch": 0.01, | |
| "step": 875 | |
| }, | |
| { | |
| "loss": 1.1913, | |
| "grad_norm": 1.5143325328826904, | |
| "learning_rate": 0.0001972995704544772, | |
| "epoch": 0.01, | |
| "step": 900 | |
| }, | |
| { | |
| "loss": 1.1401, | |
| "grad_norm": 1.9220173358917236, | |
| "learning_rate": 0.00019722447508335587, | |
| "epoch": 0.01, | |
| "step": 925 | |
| }, | |
| { | |
| "loss": 1.209, | |
| "grad_norm": 1.1575367450714111, | |
| "learning_rate": 0.0001971523835270794, | |
| "epoch": 0.01, | |
| "step": 950 | |
| }, | |
| { | |
| "loss": 1.1926, | |
| "grad_norm": 0.950126051902771, | |
| "learning_rate": 0.00019707728815595806, | |
| "epoch": 0.01, | |
| "step": 975 | |
| }, | |
| { | |
| "loss": 1.1288, | |
| "grad_norm": 1.4479137659072876, | |
| "learning_rate": 0.00019700219278483676, | |
| "epoch": 0.02, | |
| "step": 1000 | |
| }, | |
| { | |
| "loss": 1.1002, | |
| "grad_norm": 1.2142207622528076, | |
| "learning_rate": 0.00019692709741371542, | |
| "epoch": 0.02, | |
| "step": 1025 | |
| }, | |
| { | |
| "loss": 1.1094, | |
| "grad_norm": 1.5564340353012085, | |
| "learning_rate": 0.0001968520020425941, | |
| "epoch": 0.02, | |
| "step": 1050 | |
| }, | |
| { | |
| "loss": 1.1565, | |
| "grad_norm": 0.9252703189849854, | |
| "learning_rate": 0.00019677690667147278, | |
| "epoch": 0.02, | |
| "step": 1075 | |
| }, | |
| { | |
| "loss": 1.1714, | |
| "grad_norm": 1.2928420305252075, | |
| "learning_rate": 0.00019670181130035147, | |
| "epoch": 0.02, | |
| "step": 1100 | |
| }, | |
| { | |
| "loss": 1.1822, | |
| "grad_norm": 6.4705281257629395, | |
| "learning_rate": 0.0001966267159292301, | |
| "epoch": 0.02, | |
| "step": 1125 | |
| }, | |
| { | |
| "loss": 1.1806, | |
| "grad_norm": 1.6441676616668701, | |
| "learning_rate": 0.0001965516205581088, | |
| "epoch": 0.02, | |
| "step": 1150 | |
| }, | |
| { | |
| "loss": 1.1523, | |
| "grad_norm": 1.981781244277954, | |
| "learning_rate": 0.0001964765251869875, | |
| "epoch": 0.02, | |
| "step": 1175 | |
| }, | |
| { | |
| "loss": 1.1556, | |
| "grad_norm": 1.6746410131454468, | |
| "learning_rate": 0.00019640142981586616, | |
| "epoch": 0.02, | |
| "step": 1200 | |
| }, | |
| { | |
| "loss": 1.1145, | |
| "grad_norm": 1.4516457319259644, | |
| "learning_rate": 0.00019632633444474485, | |
| "epoch": 0.02, | |
| "step": 1225 | |
| }, | |
| { | |
| "loss": 1.1196, | |
| "grad_norm": 1.1103088855743408, | |
| "learning_rate": 0.00019625123907362352, | |
| "epoch": 0.02, | |
| "step": 1250 | |
| }, | |
| { | |
| "loss": 1.1693, | |
| "grad_norm": 2.238402843475342, | |
| "learning_rate": 0.00019617614370250218, | |
| "epoch": 0.02, | |
| "step": 1275 | |
| }, | |
| { | |
| "loss": 1.1292, | |
| "grad_norm": 1.3010998964309692, | |
| "learning_rate": 0.00019610104833138085, | |
| "epoch": 0.02, | |
| "step": 1300 | |
| }, | |
| { | |
| "loss": 1.1058, | |
| "grad_norm": 1.2125681638717651, | |
| "learning_rate": 0.00019602595296025954, | |
| "epoch": 0.02, | |
| "step": 1325 | |
| }, | |
| { | |
| "loss": 1.2092, | |
| "grad_norm": 1.263877272605896, | |
| "learning_rate": 0.00019595085758913823, | |
| "epoch": 0.02, | |
| "step": 1350 | |
| }, | |
| { | |
| "loss": 1.1451, | |
| "grad_norm": 1.3143881559371948, | |
| "learning_rate": 0.0001958757622180169, | |
| "epoch": 0.02, | |
| "step": 1375 | |
| }, | |
| { | |
| "loss": 1.1226, | |
| "grad_norm": 1.081262230873108, | |
| "learning_rate": 0.00019580066684689556, | |
| "epoch": 0.02, | |
| "step": 1400 | |
| }, | |
| { | |
| "loss": 1.1559, | |
| "grad_norm": 1.4016741514205933, | |
| "learning_rate": 0.00019572557147577423, | |
| "epoch": 0.02, | |
| "step": 1425 | |
| }, | |
| { | |
| "loss": 1.1568, | |
| "grad_norm": 1.435803771018982, | |
| "learning_rate": 0.00019565047610465292, | |
| "epoch": 0.02, | |
| "step": 1450 | |
| }, | |
| { | |
| "loss": 1.1661, | |
| "grad_norm": 0.9757218956947327, | |
| "learning_rate": 0.00019557538073353159, | |
| "epoch": 0.02, | |
| "step": 1475 | |
| }, | |
| { | |
| "loss": 1.1674, | |
| "grad_norm": 1.1646333932876587, | |
| "learning_rate": 0.00019550028536241028, | |
| "epoch": 0.02, | |
| "step": 1500 | |
| }, | |
| { | |
| "loss": 1.229, | |
| "grad_norm": 1.707133412361145, | |
| "learning_rate": 0.00019542518999128894, | |
| "epoch": 0.02, | |
| "step": 1525 | |
| }, | |
| { | |
| "loss": 1.3046, | |
| "grad_norm": 1.5240947008132935, | |
| "learning_rate": 0.00019535009462016764, | |
| "epoch": 0.02, | |
| "step": 1550 | |
| }, | |
| { | |
| "loss": 1.3126, | |
| "grad_norm": 1.0018959045410156, | |
| "learning_rate": 0.0001952749992490463, | |
| "epoch": 0.02, | |
| "step": 1575 | |
| }, | |
| { | |
| "loss": 1.2822, | |
| "grad_norm": 1.017600655555725, | |
| "learning_rate": 0.00019519990387792497, | |
| "epoch": 0.02, | |
| "step": 1600 | |
| }, | |
| { | |
| "loss": 1.339, | |
| "grad_norm": 1.2864093780517578, | |
| "learning_rate": 0.00019512480850680366, | |
| "epoch": 0.02, | |
| "step": 1625 | |
| }, | |
| { | |
| "loss": 1.277, | |
| "grad_norm": 1.2390028238296509, | |
| "learning_rate": 0.00019504971313568232, | |
| "epoch": 0.02, | |
| "step": 1650 | |
| }, | |
| { | |
| "loss": 1.2122, | |
| "grad_norm": 1.8026188611984253, | |
| "learning_rate": 0.00019497461776456102, | |
| "epoch": 0.03, | |
| "step": 1675 | |
| }, | |
| { | |
| "loss": 1.3177, | |
| "grad_norm": 1.143813967704773, | |
| "learning_rate": 0.00019489952239343968, | |
| "epoch": 0.03, | |
| "step": 1700 | |
| }, | |
| { | |
| "loss": 1.3457, | |
| "grad_norm": 1.5295052528381348, | |
| "learning_rate": 0.00019482442702231835, | |
| "epoch": 0.03, | |
| "step": 1725 | |
| }, | |
| { | |
| "loss": 1.2132, | |
| "grad_norm": 1.7378249168395996, | |
| "learning_rate": 0.000194749331651197, | |
| "epoch": 0.03, | |
| "step": 1750 | |
| }, | |
| { | |
| "loss": 1.2284, | |
| "grad_norm": 2.3345251083374023, | |
| "learning_rate": 0.0001946742362800757, | |
| "epoch": 0.03, | |
| "step": 1775 | |
| }, | |
| { | |
| "loss": 1.3233, | |
| "grad_norm": 2.047725200653076, | |
| "learning_rate": 0.0001945991409089544, | |
| "epoch": 0.03, | |
| "step": 1800 | |
| }, | |
| { | |
| "loss": 1.2398, | |
| "grad_norm": 1.7765051126480103, | |
| "learning_rate": 0.00019452404553783306, | |
| "epoch": 0.03, | |
| "step": 1825 | |
| }, | |
| { | |
| "loss": 1.226, | |
| "grad_norm": 1.4397103786468506, | |
| "learning_rate": 0.00019444895016671173, | |
| "epoch": 0.03, | |
| "step": 1850 | |
| }, | |
| { | |
| "loss": 1.3669, | |
| "grad_norm": 1.741700530052185, | |
| "learning_rate": 0.0001943738547955904, | |
| "epoch": 0.03, | |
| "step": 1875 | |
| }, | |
| { | |
| "loss": 1.2807, | |
| "grad_norm": 1.4095584154129028, | |
| "learning_rate": 0.00019429875942446909, | |
| "epoch": 0.03, | |
| "step": 1900 | |
| }, | |
| { | |
| "loss": 1.1971, | |
| "grad_norm": 1.1434788703918457, | |
| "learning_rate": 0.00019422366405334775, | |
| "epoch": 0.03, | |
| "step": 1925 | |
| }, | |
| { | |
| "loss": 1.3073, | |
| "grad_norm": 1.1965715885162354, | |
| "learning_rate": 0.00019414856868222644, | |
| "epoch": 0.03, | |
| "step": 1950 | |
| }, | |
| { | |
| "loss": 1.1621, | |
| "grad_norm": 1.6625946760177612, | |
| "learning_rate": 0.0001940734733111051, | |
| "epoch": 0.03, | |
| "step": 1975 | |
| }, | |
| { | |
| "loss": 1.2672, | |
| "grad_norm": 1.454901933670044, | |
| "learning_rate": 0.0001939983779399838, | |
| "epoch": 0.03, | |
| "step": 2000 | |
| }, | |
| { | |
| "loss": 1.2422, | |
| "grad_norm": 1.198033094406128, | |
| "learning_rate": 0.00019392328256886247, | |
| "epoch": 0.03, | |
| "step": 2025 | |
| }, | |
| { | |
| "loss": 1.1992, | |
| "grad_norm": 1.9674854278564453, | |
| "learning_rate": 0.00019384818719774113, | |
| "epoch": 0.03, | |
| "step": 2050 | |
| }, | |
| { | |
| "loss": 1.2854, | |
| "grad_norm": 1.4203040599822998, | |
| "learning_rate": 0.00019377309182661982, | |
| "epoch": 0.03, | |
| "step": 2075 | |
| }, | |
| { | |
| "loss": 1.2896, | |
| "grad_norm": 2.059704065322876, | |
| "learning_rate": 0.0001936979964554985, | |
| "epoch": 0.03, | |
| "step": 2100 | |
| }, | |
| { | |
| "loss": 1.2304, | |
| "grad_norm": 3.035451889038086, | |
| "learning_rate": 0.00019362290108437718, | |
| "epoch": 0.03, | |
| "step": 2125 | |
| }, | |
| { | |
| "loss": 1.2555, | |
| "grad_norm": 0.9351598620414734, | |
| "learning_rate": 0.00019354780571325585, | |
| "epoch": 0.03, | |
| "step": 2150 | |
| }, | |
| { | |
| "loss": 1.2276, | |
| "grad_norm": 1.0075334310531616, | |
| "learning_rate": 0.0001934727103421345, | |
| "epoch": 0.03, | |
| "step": 2175 | |
| }, | |
| { | |
| "loss": 1.2042, | |
| "grad_norm": 1.5209815502166748, | |
| "learning_rate": 0.00019339761497101318, | |
| "epoch": 0.03, | |
| "step": 2200 | |
| }, | |
| { | |
| "loss": 1.2262, | |
| "grad_norm": 0.9942296147346497, | |
| "learning_rate": 0.00019332251959989187, | |
| "epoch": 0.03, | |
| "step": 2225 | |
| }, | |
| { | |
| "loss": 1.2108, | |
| "grad_norm": 1.755386233329773, | |
| "learning_rate": 0.00019324742422877056, | |
| "epoch": 0.03, | |
| "step": 2250 | |
| }, | |
| { | |
| "loss": 1.2354, | |
| "grad_norm": 1.129966378211975, | |
| "learning_rate": 0.00019317232885764923, | |
| "epoch": 0.03, | |
| "step": 2275 | |
| }, | |
| { | |
| "loss": 1.1907, | |
| "grad_norm": 0.871713399887085, | |
| "learning_rate": 0.0001930972334865279, | |
| "epoch": 0.03, | |
| "step": 2300 | |
| }, | |
| { | |
| "loss": 1.2465, | |
| "grad_norm": 1.4679888486862183, | |
| "learning_rate": 0.00019302213811540656, | |
| "epoch": 0.03, | |
| "step": 2325 | |
| }, | |
| { | |
| "loss": 1.2956, | |
| "grad_norm": 1.5835942029953003, | |
| "learning_rate": 0.00019294704274428525, | |
| "epoch": 0.04, | |
| "step": 2350 | |
| }, | |
| { | |
| "loss": 1.238, | |
| "grad_norm": 1.7425931692123413, | |
| "learning_rate": 0.00019287194737316392, | |
| "epoch": 0.04, | |
| "step": 2375 | |
| }, | |
| { | |
| "loss": 1.3505, | |
| "grad_norm": 0.9919810891151428, | |
| "learning_rate": 0.0001927968520020426, | |
| "epoch": 0.04, | |
| "step": 2400 | |
| }, | |
| { | |
| "loss": 1.3327, | |
| "grad_norm": 1.3356170654296875, | |
| "learning_rate": 0.00019272175663092127, | |
| "epoch": 0.04, | |
| "step": 2425 | |
| }, | |
| { | |
| "loss": 1.2885, | |
| "grad_norm": 0.9927514791488647, | |
| "learning_rate": 0.00019264666125979997, | |
| "epoch": 0.04, | |
| "step": 2450 | |
| }, | |
| { | |
| "loss": 1.245, | |
| "grad_norm": 1.3974716663360596, | |
| "learning_rate": 0.00019257156588867863, | |
| "epoch": 0.04, | |
| "step": 2475 | |
| }, | |
| { | |
| "loss": 1.2714, | |
| "grad_norm": 1.3735284805297852, | |
| "learning_rate": 0.0001924964705175573, | |
| "epoch": 0.04, | |
| "step": 2500 | |
| }, | |
| { | |
| "loss": 1.2481, | |
| "grad_norm": 1.096691370010376, | |
| "learning_rate": 0.000192421375146436, | |
| "epoch": 0.04, | |
| "step": 2525 | |
| }, | |
| { | |
| "loss": 1.2765, | |
| "grad_norm": 1.1713175773620605, | |
| "learning_rate": 0.00019234928359015952, | |
| "epoch": 0.04, | |
| "step": 2550 | |
| }, | |
| { | |
| "loss": 1.1508, | |
| "grad_norm": 1.2733495235443115, | |
| "learning_rate": 0.0001922741882190382, | |
| "epoch": 0.04, | |
| "step": 2575 | |
| }, | |
| { | |
| "loss": 1.2932, | |
| "grad_norm": 0.9217672944068909, | |
| "learning_rate": 0.00019219909284791685, | |
| "epoch": 0.04, | |
| "step": 2600 | |
| }, | |
| { | |
| "loss": 1.2598, | |
| "grad_norm": 2.117608070373535, | |
| "learning_rate": 0.00019212399747679554, | |
| "epoch": 0.04, | |
| "step": 2625 | |
| }, | |
| { | |
| "loss": 1.3049, | |
| "grad_norm": 1.0736849308013916, | |
| "learning_rate": 0.0001920489021056742, | |
| "epoch": 0.04, | |
| "step": 2650 | |
| }, | |
| { | |
| "loss": 1.2122, | |
| "grad_norm": 0.9429724812507629, | |
| "learning_rate": 0.0001919738067345529, | |
| "epoch": 0.04, | |
| "step": 2675 | |
| }, | |
| { | |
| "loss": 1.2863, | |
| "grad_norm": 1.8061331510543823, | |
| "learning_rate": 0.00019189871136343156, | |
| "epoch": 0.04, | |
| "step": 2700 | |
| }, | |
| { | |
| "loss": 1.2269, | |
| "grad_norm": 1.2970006465911865, | |
| "learning_rate": 0.00019182361599231026, | |
| "epoch": 0.04, | |
| "step": 2725 | |
| }, | |
| { | |
| "loss": 1.2015, | |
| "grad_norm": 1.2956987619400024, | |
| "learning_rate": 0.00019174852062118892, | |
| "epoch": 0.04, | |
| "step": 2750 | |
| }, | |
| { | |
| "loss": 1.2501, | |
| "grad_norm": 2.2317731380462646, | |
| "learning_rate": 0.0001916734252500676, | |
| "epoch": 0.04, | |
| "step": 2775 | |
| }, | |
| { | |
| "loss": 1.256, | |
| "grad_norm": 1.3463131189346313, | |
| "learning_rate": 0.00019159832987894628, | |
| "epoch": 0.04, | |
| "step": 2800 | |
| }, | |
| { | |
| "loss": 1.2649, | |
| "grad_norm": 1.146892786026001, | |
| "learning_rate": 0.00019152323450782495, | |
| "epoch": 0.04, | |
| "step": 2825 | |
| }, | |
| { | |
| "loss": 1.3137, | |
| "grad_norm": 1.524172306060791, | |
| "learning_rate": 0.00019144813913670364, | |
| "epoch": 0.04, | |
| "step": 2850 | |
| }, | |
| { | |
| "loss": 1.2658, | |
| "grad_norm": 1.0975860357284546, | |
| "learning_rate": 0.0001913730437655823, | |
| "epoch": 0.04, | |
| "step": 2875 | |
| }, | |
| { | |
| "loss": 1.2279, | |
| "grad_norm": 1.350852608680725, | |
| "learning_rate": 0.00019129794839446097, | |
| "epoch": 0.04, | |
| "step": 2900 | |
| }, | |
| { | |
| "loss": 1.2253, | |
| "grad_norm": 1.8295092582702637, | |
| "learning_rate": 0.00019122285302333963, | |
| "epoch": 0.04, | |
| "step": 2925 | |
| }, | |
| { | |
| "loss": 1.3226, | |
| "grad_norm": 2.276642322540283, | |
| "learning_rate": 0.00019114775765221833, | |
| "epoch": 0.04, | |
| "step": 2950 | |
| }, | |
| { | |
| "loss": 1.2016, | |
| "grad_norm": 1.5693854093551636, | |
| "learning_rate": 0.000191072662281097, | |
| "epoch": 0.04, | |
| "step": 2975 | |
| }, | |
| { | |
| "loss": 1.1537, | |
| "grad_norm": 1.3827277421951294, | |
| "learning_rate": 0.00019099756690997568, | |
| "epoch": 0.05, | |
| "step": 3000 | |
| }, | |
| { | |
| "loss": 1.3013, | |
| "grad_norm": 1.0224173069000244, | |
| "learning_rate": 0.00019092247153885438, | |
| "epoch": 0.05, | |
| "step": 3025 | |
| }, | |
| { | |
| "loss": 1.248, | |
| "grad_norm": 1.8332293033599854, | |
| "learning_rate": 0.00019084737616773301, | |
| "epoch": 0.05, | |
| "step": 3050 | |
| }, | |
| { | |
| "loss": 1.2415, | |
| "grad_norm": 0.9008692502975464, | |
| "learning_rate": 0.0001907722807966117, | |
| "epoch": 0.05, | |
| "step": 3075 | |
| }, | |
| { | |
| "loss": 1.2225, | |
| "grad_norm": 1.5711129903793335, | |
| "learning_rate": 0.00019069718542549037, | |
| "epoch": 0.05, | |
| "step": 3100 | |
| }, | |
| { | |
| "loss": 1.197, | |
| "grad_norm": 2.6612651348114014, | |
| "learning_rate": 0.00019062209005436906, | |
| "epoch": 0.05, | |
| "step": 3125 | |
| }, | |
| { | |
| "loss": 1.2788, | |
| "grad_norm": 1.3320034742355347, | |
| "learning_rate": 0.00019054699468324773, | |
| "epoch": 0.05, | |
| "step": 3150 | |
| }, | |
| { | |
| "loss": 1.3182, | |
| "grad_norm": 1.8735719919204712, | |
| "learning_rate": 0.00019047189931212642, | |
| "epoch": 0.05, | |
| "step": 3175 | |
| }, | |
| { | |
| "loss": 1.2997, | |
| "grad_norm": 1.550970435142517, | |
| "learning_rate": 0.0001903968039410051, | |
| "epoch": 0.05, | |
| "step": 3200 | |
| }, | |
| { | |
| "loss": 1.2142, | |
| "grad_norm": 1.3348615169525146, | |
| "learning_rate": 0.00019032170856988375, | |
| "epoch": 0.05, | |
| "step": 3225 | |
| }, | |
| { | |
| "loss": 1.1912, | |
| "grad_norm": 1.036444902420044, | |
| "learning_rate": 0.00019024661319876245, | |
| "epoch": 0.05, | |
| "step": 3250 | |
| }, | |
| { | |
| "loss": 1.2365, | |
| "grad_norm": 1.920847773551941, | |
| "learning_rate": 0.0001901715178276411, | |
| "epoch": 0.05, | |
| "step": 3275 | |
| }, | |
| { | |
| "loss": 1.2733, | |
| "grad_norm": 3.4518144130706787, | |
| "learning_rate": 0.0001900964224565198, | |
| "epoch": 0.05, | |
| "step": 3300 | |
| }, | |
| { | |
| "loss": 1.1872, | |
| "grad_norm": 2.0837206840515137, | |
| "learning_rate": 0.00019002132708539847, | |
| "epoch": 0.05, | |
| "step": 3325 | |
| }, | |
| { | |
| "loss": 1.2276, | |
| "grad_norm": 1.4272059202194214, | |
| "learning_rate": 0.00018994623171427713, | |
| "epoch": 0.05, | |
| "step": 3350 | |
| }, | |
| { | |
| "loss": 1.2622, | |
| "grad_norm": 1.0555847883224487, | |
| "learning_rate": 0.0001898711363431558, | |
| "epoch": 0.05, | |
| "step": 3375 | |
| }, | |
| { | |
| "loss": 1.2896, | |
| "grad_norm": 0.9901136755943298, | |
| "learning_rate": 0.0001897960409720345, | |
| "epoch": 0.05, | |
| "step": 3400 | |
| }, | |
| { | |
| "loss": 1.2141, | |
| "grad_norm": 2.292473793029785, | |
| "learning_rate": 0.00018972094560091316, | |
| "epoch": 0.05, | |
| "step": 3425 | |
| }, | |
| { | |
| "loss": 1.353, | |
| "grad_norm": 1.7795960903167725, | |
| "learning_rate": 0.00018964585022979185, | |
| "epoch": 0.05, | |
| "step": 3450 | |
| }, | |
| { | |
| "loss": 1.2276, | |
| "grad_norm": 1.4300271272659302, | |
| "learning_rate": 0.00018957075485867054, | |
| "epoch": 0.05, | |
| "step": 3475 | |
| }, | |
| { | |
| "loss": 1.2115, | |
| "grad_norm": 1.8741381168365479, | |
| "learning_rate": 0.00018949565948754918, | |
| "epoch": 0.05, | |
| "step": 3500 | |
| }, | |
| { | |
| "loss": 1.2229, | |
| "grad_norm": 1.2686586380004883, | |
| "learning_rate": 0.00018942056411642787, | |
| "epoch": 0.05, | |
| "step": 3525 | |
| }, | |
| { | |
| "loss": 1.2484, | |
| "grad_norm": 1.245797872543335, | |
| "learning_rate": 0.00018934546874530654, | |
| "epoch": 0.05, | |
| "step": 3550 | |
| }, | |
| { | |
| "loss": 1.2026, | |
| "grad_norm": 1.744106650352478, | |
| "learning_rate": 0.00018927037337418523, | |
| "epoch": 0.05, | |
| "step": 3575 | |
| }, | |
| { | |
| "loss": 1.1809, | |
| "grad_norm": 1.4250385761260986, | |
| "learning_rate": 0.0001891952780030639, | |
| "epoch": 0.05, | |
| "step": 3600 | |
| }, | |
| { | |
| "loss": 1.2408, | |
| "grad_norm": 3.535332202911377, | |
| "learning_rate": 0.0001891201826319426, | |
| "epoch": 0.05, | |
| "step": 3625 | |
| }, | |
| { | |
| "loss": 1.3137, | |
| "grad_norm": 1.612424612045288, | |
| "learning_rate": 0.00018904508726082125, | |
| "epoch": 0.05, | |
| "step": 3650 | |
| }, | |
| { | |
| "loss": 1.2419, | |
| "grad_norm": 2.110978841781616, | |
| "learning_rate": 0.00018896999188969992, | |
| "epoch": 0.06, | |
| "step": 3675 | |
| }, | |
| { | |
| "loss": 1.2782, | |
| "grad_norm": 1.3754994869232178, | |
| "learning_rate": 0.0001888948965185786, | |
| "epoch": 0.06, | |
| "step": 3700 | |
| }, | |
| { | |
| "loss": 1.1582, | |
| "grad_norm": 0.849475085735321, | |
| "learning_rate": 0.00018881980114745728, | |
| "epoch": 0.06, | |
| "step": 3725 | |
| }, | |
| { | |
| "loss": 1.1607, | |
| "grad_norm": 1.4278253316879272, | |
| "learning_rate": 0.00018874470577633597, | |
| "epoch": 0.06, | |
| "step": 3750 | |
| }, | |
| { | |
| "loss": 1.2154, | |
| "grad_norm": 3.2679355144500732, | |
| "learning_rate": 0.00018866961040521463, | |
| "epoch": 0.06, | |
| "step": 3775 | |
| }, | |
| { | |
| "loss": 1.1998, | |
| "grad_norm": 0.9529170393943787, | |
| "learning_rate": 0.0001885945150340933, | |
| "epoch": 0.06, | |
| "step": 3800 | |
| }, | |
| { | |
| "loss": 1.2879, | |
| "grad_norm": 1.243181586265564, | |
| "learning_rate": 0.00018851941966297196, | |
| "epoch": 0.06, | |
| "step": 3825 | |
| }, | |
| { | |
| "loss": 1.2077, | |
| "grad_norm": 1.7659603357315063, | |
| "learning_rate": 0.00018844432429185066, | |
| "epoch": 0.06, | |
| "step": 3850 | |
| }, | |
| { | |
| "loss": 1.2047, | |
| "grad_norm": 1.5011489391326904, | |
| "learning_rate": 0.00018836922892072935, | |
| "epoch": 0.06, | |
| "step": 3875 | |
| }, | |
| { | |
| "loss": 1.2347, | |
| "grad_norm": 1.761047601699829, | |
| "learning_rate": 0.00018829413354960801, | |
| "epoch": 0.06, | |
| "step": 3900 | |
| }, | |
| { | |
| "loss": 1.1974, | |
| "grad_norm": 3.6463253498077393, | |
| "learning_rate": 0.0001882190381784867, | |
| "epoch": 0.06, | |
| "step": 3925 | |
| }, | |
| { | |
| "loss": 1.1995, | |
| "grad_norm": 0.955506443977356, | |
| "learning_rate": 0.00018814394280736537, | |
| "epoch": 0.06, | |
| "step": 3950 | |
| }, | |
| { | |
| "loss": 1.2918, | |
| "grad_norm": 1.0750863552093506, | |
| "learning_rate": 0.00018806884743624404, | |
| "epoch": 0.06, | |
| "step": 3975 | |
| }, | |
| { | |
| "loss": 1.2288, | |
| "grad_norm": 1.1837942600250244, | |
| "learning_rate": 0.0001879937520651227, | |
| "epoch": 0.06, | |
| "step": 4000 | |
| }, | |
| { | |
| "loss": 1.2013, | |
| "grad_norm": 1.5817101001739502, | |
| "learning_rate": 0.0001879186566940014, | |
| "epoch": 0.06, | |
| "step": 4025 | |
| }, | |
| { | |
| "loss": 1.1851, | |
| "grad_norm": 1.1778972148895264, | |
| "learning_rate": 0.00018784356132288006, | |
| "epoch": 0.06, | |
| "step": 4050 | |
| }, | |
| { | |
| "loss": 1.2426, | |
| "grad_norm": 1.3166766166687012, | |
| "learning_rate": 0.00018776846595175875, | |
| "epoch": 0.06, | |
| "step": 4075 | |
| }, | |
| { | |
| "loss": 1.2837, | |
| "grad_norm": 2.850275993347168, | |
| "learning_rate": 0.00018769337058063742, | |
| "epoch": 0.06, | |
| "step": 4100 | |
| }, | |
| { | |
| "loss": 1.2267, | |
| "grad_norm": 1.6599894762039185, | |
| "learning_rate": 0.00018761827520951608, | |
| "epoch": 0.06, | |
| "step": 4125 | |
| }, | |
| { | |
| "loss": 1.2885, | |
| "grad_norm": 1.5684510469436646, | |
| "learning_rate": 0.00018754317983839478, | |
| "epoch": 0.06, | |
| "step": 4150 | |
| }, | |
| { | |
| "loss": 1.2471, | |
| "grad_norm": 2.0227372646331787, | |
| "learning_rate": 0.00018746808446727344, | |
| "epoch": 0.06, | |
| "step": 4175 | |
| }, | |
| { | |
| "loss": 1.2944, | |
| "grad_norm": 1.747456669807434, | |
| "learning_rate": 0.00018739298909615213, | |
| "epoch": 0.06, | |
| "step": 4200 | |
| }, | |
| { | |
| "loss": 1.2535, | |
| "grad_norm": 1.6037201881408691, | |
| "learning_rate": 0.0001873178937250308, | |
| "epoch": 0.06, | |
| "step": 4225 | |
| }, | |
| { | |
| "loss": 1.2354, | |
| "grad_norm": 0.9807868003845215, | |
| "learning_rate": 0.00018724279835390946, | |
| "epoch": 0.06, | |
| "step": 4250 | |
| }, | |
| { | |
| "loss": 1.1942, | |
| "grad_norm": 0.8951900601387024, | |
| "learning_rate": 0.00018716770298278813, | |
| "epoch": 0.06, | |
| "step": 4275 | |
| }, | |
| { | |
| "loss": 1.2506, | |
| "grad_norm": 1.4032387733459473, | |
| "learning_rate": 0.00018709260761166682, | |
| "epoch": 0.06, | |
| "step": 4300 | |
| }, | |
| { | |
| "loss": 1.2388, | |
| "grad_norm": 1.407606840133667, | |
| "learning_rate": 0.00018701751224054551, | |
| "epoch": 0.06, | |
| "step": 4325 | |
| }, | |
| { | |
| "loss": 1.2856, | |
| "grad_norm": 1.153029441833496, | |
| "learning_rate": 0.00018694241686942418, | |
| "epoch": 0.07, | |
| "step": 4350 | |
| }, | |
| { | |
| "loss": 1.2268, | |
| "grad_norm": 1.075202226638794, | |
| "learning_rate": 0.00018686732149830287, | |
| "epoch": 0.07, | |
| "step": 4375 | |
| }, | |
| { | |
| "loss": 1.257, | |
| "grad_norm": 1.2537505626678467, | |
| "learning_rate": 0.00018679222612718154, | |
| "epoch": 0.07, | |
| "step": 4400 | |
| }, | |
| { | |
| "loss": 1.2183, | |
| "grad_norm": 0.9912234544754028, | |
| "learning_rate": 0.0001867171307560602, | |
| "epoch": 0.07, | |
| "step": 4425 | |
| }, | |
| { | |
| "loss": 1.1376, | |
| "grad_norm": 1.3906209468841553, | |
| "learning_rate": 0.00018664203538493887, | |
| "epoch": 0.07, | |
| "step": 4450 | |
| }, | |
| { | |
| "loss": 1.2794, | |
| "grad_norm": 1.3347073793411255, | |
| "learning_rate": 0.00018656694001381756, | |
| "epoch": 0.07, | |
| "step": 4475 | |
| }, | |
| { | |
| "loss": 1.2486, | |
| "grad_norm": 1.259150505065918, | |
| "learning_rate": 0.00018649184464269623, | |
| "epoch": 0.07, | |
| "step": 4500 | |
| }, | |
| { | |
| "loss": 1.2408, | |
| "grad_norm": 1.7800498008728027, | |
| "learning_rate": 0.00018641674927157492, | |
| "epoch": 0.07, | |
| "step": 4525 | |
| }, | |
| { | |
| "loss": 1.282, | |
| "grad_norm": 0.9904906153678894, | |
| "learning_rate": 0.00018634165390045358, | |
| "epoch": 0.07, | |
| "step": 4550 | |
| }, | |
| { | |
| "loss": 1.2944, | |
| "grad_norm": 1.2220566272735596, | |
| "learning_rate": 0.00018626655852933225, | |
| "epoch": 0.07, | |
| "step": 4575 | |
| }, | |
| { | |
| "loss": 1.266, | |
| "grad_norm": 1.4289559125900269, | |
| "learning_rate": 0.00018619146315821094, | |
| "epoch": 0.07, | |
| "step": 4600 | |
| }, | |
| { | |
| "loss": 1.2164, | |
| "grad_norm": 1.5805399417877197, | |
| "learning_rate": 0.0001861163677870896, | |
| "epoch": 0.07, | |
| "step": 4625 | |
| }, | |
| { | |
| "loss": 1.2176, | |
| "grad_norm": 1.2486138343811035, | |
| "learning_rate": 0.0001860412724159683, | |
| "epoch": 0.07, | |
| "step": 4650 | |
| }, | |
| { | |
| "loss": 1.2165, | |
| "grad_norm": 1.4444175958633423, | |
| "learning_rate": 0.00018596617704484696, | |
| "epoch": 0.07, | |
| "step": 4675 | |
| }, | |
| { | |
| "loss": 1.2247, | |
| "grad_norm": 1.6640115976333618, | |
| "learning_rate": 0.00018589108167372563, | |
| "epoch": 0.07, | |
| "step": 4700 | |
| }, | |
| { | |
| "loss": 1.2123, | |
| "grad_norm": 1.1432693004608154, | |
| "learning_rate": 0.0001858159863026043, | |
| "epoch": 0.07, | |
| "step": 4725 | |
| }, | |
| { | |
| "loss": 1.2347, | |
| "grad_norm": 0.9574340581893921, | |
| "learning_rate": 0.000185740890931483, | |
| "epoch": 0.07, | |
| "step": 4750 | |
| }, | |
| { | |
| "loss": 1.2177, | |
| "grad_norm": 1.5829005241394043, | |
| "learning_rate": 0.00018566579556036168, | |
| "epoch": 0.07, | |
| "step": 4775 | |
| }, | |
| { | |
| "loss": 1.2693, | |
| "grad_norm": 1.0968513488769531, | |
| "learning_rate": 0.00018559070018924035, | |
| "epoch": 0.07, | |
| "step": 4800 | |
| }, | |
| { | |
| "loss": 1.2203, | |
| "grad_norm": 1.2009191513061523, | |
| "learning_rate": 0.00018551560481811904, | |
| "epoch": 0.07, | |
| "step": 4825 | |
| }, | |
| { | |
| "loss": 1.2341, | |
| "grad_norm": 1.4881080389022827, | |
| "learning_rate": 0.0001854405094469977, | |
| "epoch": 0.07, | |
| "step": 4850 | |
| }, | |
| { | |
| "loss": 1.1719, | |
| "grad_norm": 1.083778977394104, | |
| "learning_rate": 0.00018536541407587637, | |
| "epoch": 0.07, | |
| "step": 4875 | |
| }, | |
| { | |
| "loss": 1.2091, | |
| "grad_norm": 1.382657766342163, | |
| "learning_rate": 0.00018529031870475503, | |
| "epoch": 0.07, | |
| "step": 4900 | |
| }, | |
| { | |
| "loss": 1.2022, | |
| "grad_norm": 1.0024495124816895, | |
| "learning_rate": 0.00018521522333363373, | |
| "epoch": 0.07, | |
| "step": 4925 | |
| }, | |
| { | |
| "loss": 1.2967, | |
| "grad_norm": 1.2385984659194946, | |
| "learning_rate": 0.0001851401279625124, | |
| "epoch": 0.07, | |
| "step": 4950 | |
| }, | |
| { | |
| "loss": 1.2568, | |
| "grad_norm": 2.7820701599121094, | |
| "learning_rate": 0.00018506503259139108, | |
| "epoch": 0.07, | |
| "step": 4975 | |
| }, | |
| { | |
| "loss": 1.2483, | |
| "grad_norm": 1.5247501134872437, | |
| "learning_rate": 0.00018498993722026975, | |
| "epoch": 0.08, | |
| "step": 5000 | |
| }, | |
| { | |
| "loss": 1.2827, | |
| "grad_norm": 1.855117678642273, | |
| "learning_rate": 0.00018491484184914841, | |
| "epoch": 0.08, | |
| "step": 5025 | |
| }, | |
| { | |
| "loss": 1.2518, | |
| "grad_norm": 1.2518528699874878, | |
| "learning_rate": 0.0001848397464780271, | |
| "epoch": 0.08, | |
| "step": 5050 | |
| }, | |
| { | |
| "loss": 1.2962, | |
| "grad_norm": 1.2159770727157593, | |
| "learning_rate": 0.00018476465110690577, | |
| "epoch": 0.08, | |
| "step": 5075 | |
| }, | |
| { | |
| "loss": 1.2611, | |
| "grad_norm": 1.3085296154022217, | |
| "learning_rate": 0.00018468955573578446, | |
| "epoch": 0.08, | |
| "step": 5100 | |
| }, | |
| { | |
| "loss": 1.2261, | |
| "grad_norm": 2.2151153087615967, | |
| "learning_rate": 0.00018461446036466313, | |
| "epoch": 0.08, | |
| "step": 5125 | |
| }, | |
| { | |
| "loss": 1.3178, | |
| "grad_norm": 2.004448175430298, | |
| "learning_rate": 0.0001845393649935418, | |
| "epoch": 0.08, | |
| "step": 5150 | |
| }, | |
| { | |
| "loss": 1.2419, | |
| "grad_norm": 3.063715934753418, | |
| "learning_rate": 0.0001844642696224205, | |
| "epoch": 0.08, | |
| "step": 5175 | |
| }, | |
| { | |
| "loss": 1.2084, | |
| "grad_norm": 1.432442545890808, | |
| "learning_rate": 0.00018438917425129915, | |
| "epoch": 0.08, | |
| "step": 5200 | |
| }, | |
| { | |
| "loss": 1.2254, | |
| "grad_norm": 2.0840189456939697, | |
| "learning_rate": 0.00018431407888017785, | |
| "epoch": 0.08, | |
| "step": 5225 | |
| }, | |
| { | |
| "loss": 1.1547, | |
| "grad_norm": 1.479894995689392, | |
| "learning_rate": 0.0001842389835090565, | |
| "epoch": 0.08, | |
| "step": 5250 | |
| }, | |
| { | |
| "loss": 1.2749, | |
| "grad_norm": 1.097493290901184, | |
| "learning_rate": 0.0001841638881379352, | |
| "epoch": 0.08, | |
| "step": 5275 | |
| }, | |
| { | |
| "loss": 1.2417, | |
| "grad_norm": 4.6398539543151855, | |
| "learning_rate": 0.00018408879276681387, | |
| "epoch": 0.08, | |
| "step": 5300 | |
| }, | |
| { | |
| "loss": 1.2506, | |
| "grad_norm": 0.9642776250839233, | |
| "learning_rate": 0.00018401369739569253, | |
| "epoch": 0.08, | |
| "step": 5325 | |
| }, | |
| { | |
| "loss": 1.2349, | |
| "grad_norm": 1.2694449424743652, | |
| "learning_rate": 0.0001839386020245712, | |
| "epoch": 0.08, | |
| "step": 5350 | |
| }, | |
| { | |
| "loss": 1.2158, | |
| "grad_norm": 1.2243396043777466, | |
| "learning_rate": 0.0001838635066534499, | |
| "epoch": 0.08, | |
| "step": 5375 | |
| }, | |
| { | |
| "loss": 1.2665, | |
| "grad_norm": 1.859632134437561, | |
| "learning_rate": 0.00018378841128232858, | |
| "epoch": 0.08, | |
| "step": 5400 | |
| }, | |
| { | |
| "loss": 1.2325, | |
| "grad_norm": 1.0260474681854248, | |
| "learning_rate": 0.00018371331591120725, | |
| "epoch": 0.08, | |
| "step": 5425 | |
| }, | |
| { | |
| "loss": 1.2785, | |
| "grad_norm": 1.646101713180542, | |
| "learning_rate": 0.00018363822054008591, | |
| "epoch": 0.08, | |
| "step": 5450 | |
| }, | |
| { | |
| "loss": 1.1545, | |
| "grad_norm": 0.9569182395935059, | |
| "learning_rate": 0.00018356312516896458, | |
| "epoch": 0.08, | |
| "step": 5475 | |
| }, | |
| { | |
| "loss": 1.2172, | |
| "grad_norm": 1.4357048273086548, | |
| "learning_rate": 0.00018348802979784327, | |
| "epoch": 0.08, | |
| "step": 5500 | |
| }, | |
| { | |
| "loss": 1.2557, | |
| "grad_norm": 1.1532384157180786, | |
| "learning_rate": 0.00018341293442672194, | |
| "epoch": 0.08, | |
| "step": 5525 | |
| }, | |
| { | |
| "loss": 1.2258, | |
| "grad_norm": 1.1566283702850342, | |
| "learning_rate": 0.00018333783905560063, | |
| "epoch": 0.08, | |
| "step": 5550 | |
| }, | |
| { | |
| "loss": 1.1953, | |
| "grad_norm": 1.5711147785186768, | |
| "learning_rate": 0.0001832627436844793, | |
| "epoch": 0.08, | |
| "step": 5575 | |
| }, | |
| { | |
| "loss": 1.1736, | |
| "grad_norm": 1.2852180004119873, | |
| "learning_rate": 0.00018318764831335796, | |
| "epoch": 0.08, | |
| "step": 5600 | |
| }, | |
| { | |
| "loss": 1.2572, | |
| "grad_norm": 1.2723329067230225, | |
| "learning_rate": 0.00018311255294223665, | |
| "epoch": 0.08, | |
| "step": 5625 | |
| }, | |
| { | |
| "loss": 1.241, | |
| "grad_norm": 1.6078953742980957, | |
| "learning_rate": 0.00018303745757111532, | |
| "epoch": 0.08, | |
| "step": 5650 | |
| }, | |
| { | |
| "loss": 1.2206, | |
| "grad_norm": 1.821363925933838, | |
| "learning_rate": 0.000182962362199994, | |
| "epoch": 0.09, | |
| "step": 5675 | |
| }, | |
| { | |
| "loss": 1.2775, | |
| "grad_norm": 1.3025563955307007, | |
| "learning_rate": 0.00018288726682887268, | |
| "epoch": 0.09, | |
| "step": 5700 | |
| }, | |
| { | |
| "loss": 1.2296, | |
| "grad_norm": 1.7497808933258057, | |
| "learning_rate": 0.00018281217145775137, | |
| "epoch": 0.09, | |
| "step": 5725 | |
| }, | |
| { | |
| "loss": 1.306, | |
| "grad_norm": 1.5627915859222412, | |
| "learning_rate": 0.00018273707608663003, | |
| "epoch": 0.09, | |
| "step": 5750 | |
| }, | |
| { | |
| "loss": 1.2339, | |
| "grad_norm": 1.217433214187622, | |
| "learning_rate": 0.0001826619807155087, | |
| "epoch": 0.09, | |
| "step": 5775 | |
| }, | |
| { | |
| "loss": 1.2396, | |
| "grad_norm": 2.4516422748565674, | |
| "learning_rate": 0.00018258688534438736, | |
| "epoch": 0.09, | |
| "step": 5800 | |
| }, | |
| { | |
| "loss": 1.2847, | |
| "grad_norm": 1.0460309982299805, | |
| "learning_rate": 0.00018251178997326606, | |
| "epoch": 0.09, | |
| "step": 5825 | |
| }, | |
| { | |
| "loss": 1.2075, | |
| "grad_norm": 1.3472404479980469, | |
| "learning_rate": 0.00018243669460214475, | |
| "epoch": 0.09, | |
| "step": 5850 | |
| }, | |
| { | |
| "loss": 1.1984, | |
| "grad_norm": 1.1247375011444092, | |
| "learning_rate": 0.00018236159923102341, | |
| "epoch": 0.09, | |
| "step": 5875 | |
| }, | |
| { | |
| "loss": 1.1911, | |
| "grad_norm": 1.175132393836975, | |
| "learning_rate": 0.00018228650385990208, | |
| "epoch": 0.09, | |
| "step": 5900 | |
| }, | |
| { | |
| "loss": 1.2405, | |
| "grad_norm": 1.176147699356079, | |
| "learning_rate": 0.00018221140848878075, | |
| "epoch": 0.09, | |
| "step": 5925 | |
| }, | |
| { | |
| "loss": 1.1924, | |
| "grad_norm": 1.0580313205718994, | |
| "learning_rate": 0.00018213631311765944, | |
| "epoch": 0.09, | |
| "step": 5950 | |
| }, | |
| { | |
| "loss": 1.2185, | |
| "grad_norm": 1.1505495309829712, | |
| "learning_rate": 0.0001820612177465381, | |
| "epoch": 0.09, | |
| "step": 5975 | |
| }, | |
| { | |
| "loss": 1.2511, | |
| "grad_norm": 1.1643320322036743, | |
| "learning_rate": 0.0001819861223754168, | |
| "epoch": 0.09, | |
| "step": 6000 | |
| }, | |
| { | |
| "loss": 1.2177, | |
| "grad_norm": 1.3354824781417847, | |
| "learning_rate": 0.00018191102700429546, | |
| "epoch": 0.09, | |
| "step": 6025 | |
| }, | |
| { | |
| "loss": 1.2254, | |
| "grad_norm": 1.547897458076477, | |
| "learning_rate": 0.00018183593163317413, | |
| "epoch": 0.09, | |
| "step": 6050 | |
| }, | |
| { | |
| "loss": 1.2464, | |
| "grad_norm": 1.5310362577438354, | |
| "learning_rate": 0.00018176083626205282, | |
| "epoch": 0.09, | |
| "step": 6075 | |
| }, | |
| { | |
| "loss": 1.2767, | |
| "grad_norm": 1.947996735572815, | |
| "learning_rate": 0.00018168574089093148, | |
| "epoch": 0.09, | |
| "step": 6100 | |
| }, | |
| { | |
| "loss": 1.1888, | |
| "grad_norm": 1.7055374383926392, | |
| "learning_rate": 0.00018161064551981018, | |
| "epoch": 0.09, | |
| "step": 6125 | |
| }, | |
| { | |
| "loss": 1.1704, | |
| "grad_norm": 1.5656442642211914, | |
| "learning_rate": 0.00018153555014868884, | |
| "epoch": 0.09, | |
| "step": 6150 | |
| }, | |
| { | |
| "loss": 1.2117, | |
| "grad_norm": 2.2562601566314697, | |
| "learning_rate": 0.00018146045477756753, | |
| "epoch": 0.09, | |
| "step": 6175 | |
| }, | |
| { | |
| "loss": 1.2164, | |
| "grad_norm": 1.9555296897888184, | |
| "learning_rate": 0.0001813853594064462, | |
| "epoch": 0.09, | |
| "step": 6200 | |
| }, | |
| { | |
| "loss": 1.181, | |
| "grad_norm": 1.350396990776062, | |
| "learning_rate": 0.00018131026403532486, | |
| "epoch": 0.09, | |
| "step": 6225 | |
| }, | |
| { | |
| "loss": 1.1364, | |
| "grad_norm": 1.306662678718567, | |
| "learning_rate": 0.00018123516866420353, | |
| "epoch": 0.09, | |
| "step": 6250 | |
| }, | |
| { | |
| "loss": 1.3395, | |
| "grad_norm": 2.2892208099365234, | |
| "learning_rate": 0.00018116007329308222, | |
| "epoch": 0.09, | |
| "step": 6275 | |
| }, | |
| { | |
| "loss": 1.2155, | |
| "grad_norm": 1.4777361154556274, | |
| "learning_rate": 0.00018108497792196091, | |
| "epoch": 0.09, | |
| "step": 6300 | |
| }, | |
| { | |
| "loss": 1.2512, | |
| "grad_norm": 1.4510390758514404, | |
| "learning_rate": 0.00018100988255083958, | |
| "epoch": 0.09, | |
| "step": 6325 | |
| }, | |
| { | |
| "loss": 1.3135, | |
| "grad_norm": 1.2510138750076294, | |
| "learning_rate": 0.00018093478717971825, | |
| "epoch": 0.1, | |
| "step": 6350 | |
| }, | |
| { | |
| "loss": 1.2027, | |
| "grad_norm": 1.6383109092712402, | |
| "learning_rate": 0.0001808596918085969, | |
| "epoch": 0.1, | |
| "step": 6375 | |
| }, | |
| { | |
| "loss": 1.1677, | |
| "grad_norm": 1.6669763326644897, | |
| "learning_rate": 0.0001807845964374756, | |
| "epoch": 0.1, | |
| "step": 6400 | |
| }, | |
| { | |
| "loss": 1.2344, | |
| "grad_norm": 1.2947137355804443, | |
| "learning_rate": 0.00018070950106635427, | |
| "epoch": 0.1, | |
| "step": 6425 | |
| }, | |
| { | |
| "loss": 1.2195, | |
| "grad_norm": 1.673285961151123, | |
| "learning_rate": 0.00018063440569523296, | |
| "epoch": 0.1, | |
| "step": 6450 | |
| }, | |
| { | |
| "loss": 1.2719, | |
| "grad_norm": 2.102374792098999, | |
| "learning_rate": 0.00018055931032411165, | |
| "epoch": 0.1, | |
| "step": 6475 | |
| }, | |
| { | |
| "loss": 1.2524, | |
| "grad_norm": 1.37187659740448, | |
| "learning_rate": 0.0001804842149529903, | |
| "epoch": 0.1, | |
| "step": 6500 | |
| }, | |
| { | |
| "loss": 1.2169, | |
| "grad_norm": 2.1124603748321533, | |
| "learning_rate": 0.00018040911958186898, | |
| "epoch": 0.1, | |
| "step": 6525 | |
| }, | |
| { | |
| "loss": 1.2206, | |
| "grad_norm": 1.2506129741668701, | |
| "learning_rate": 0.00018033402421074765, | |
| "epoch": 0.1, | |
| "step": 6550 | |
| }, | |
| { | |
| "loss": 1.1774, | |
| "grad_norm": 1.5893070697784424, | |
| "learning_rate": 0.00018025892883962634, | |
| "epoch": 0.1, | |
| "step": 6575 | |
| }, | |
| { | |
| "loss": 1.2101, | |
| "grad_norm": 2.9019079208374023, | |
| "learning_rate": 0.000180183833468505, | |
| "epoch": 0.1, | |
| "step": 6600 | |
| }, | |
| { | |
| "loss": 1.205, | |
| "grad_norm": 1.71237313747406, | |
| "learning_rate": 0.0001801087380973837, | |
| "epoch": 0.1, | |
| "step": 6625 | |
| }, | |
| { | |
| "loss": 1.2044, | |
| "grad_norm": 1.9124983549118042, | |
| "learning_rate": 0.00018003364272626236, | |
| "epoch": 0.1, | |
| "step": 6650 | |
| }, | |
| { | |
| "loss": 1.2125, | |
| "grad_norm": 1.4448764324188232, | |
| "learning_rate": 0.00017995854735514103, | |
| "epoch": 0.1, | |
| "step": 6675 | |
| }, | |
| { | |
| "loss": 1.2076, | |
| "grad_norm": 3.0220255851745605, | |
| "learning_rate": 0.00017988345198401972, | |
| "epoch": 0.1, | |
| "step": 6700 | |
| }, | |
| { | |
| "loss": 1.1899, | |
| "grad_norm": 1.3249489068984985, | |
| "learning_rate": 0.0001798083566128984, | |
| "epoch": 0.1, | |
| "step": 6725 | |
| }, | |
| { | |
| "loss": 1.2352, | |
| "grad_norm": 1.4463756084442139, | |
| "learning_rate": 0.00017973326124177708, | |
| "epoch": 0.1, | |
| "step": 6750 | |
| }, | |
| { | |
| "loss": 1.3259, | |
| "grad_norm": 1.4026572704315186, | |
| "learning_rate": 0.00017965816587065575, | |
| "epoch": 0.1, | |
| "step": 6775 | |
| }, | |
| { | |
| "loss": 1.2282, | |
| "grad_norm": 0.9847255349159241, | |
| "learning_rate": 0.0001795830704995344, | |
| "epoch": 0.1, | |
| "step": 6800 | |
| }, | |
| { | |
| "loss": 1.1899, | |
| "grad_norm": 0.91238933801651, | |
| "learning_rate": 0.00017950797512841308, | |
| "epoch": 0.1, | |
| "step": 6825 | |
| }, | |
| { | |
| "loss": 1.2386, | |
| "grad_norm": 2.1862552165985107, | |
| "learning_rate": 0.00017943287975729177, | |
| "epoch": 0.1, | |
| "step": 6850 | |
| }, | |
| { | |
| "loss": 1.2094, | |
| "grad_norm": 1.10003662109375, | |
| "learning_rate": 0.00017935778438617043, | |
| "epoch": 0.1, | |
| "step": 6875 | |
| }, | |
| { | |
| "loss": 1.2218, | |
| "grad_norm": 1.8453437089920044, | |
| "learning_rate": 0.00017928268901504913, | |
| "epoch": 0.1, | |
| "step": 6900 | |
| }, | |
| { | |
| "loss": 1.262, | |
| "grad_norm": 1.0639673471450806, | |
| "learning_rate": 0.00017920759364392782, | |
| "epoch": 0.1, | |
| "step": 6925 | |
| }, | |
| { | |
| "loss": 1.2188, | |
| "grad_norm": 1.3008592128753662, | |
| "learning_rate": 0.00017913249827280646, | |
| "epoch": 0.1, | |
| "step": 6950 | |
| }, | |
| { | |
| "loss": 1.23, | |
| "grad_norm": 1.7703325748443604, | |
| "learning_rate": 0.00017905740290168515, | |
| "epoch": 0.1, | |
| "step": 6975 | |
| }, | |
| { | |
| "loss": 1.2316, | |
| "grad_norm": 1.1259385347366333, | |
| "learning_rate": 0.00017898230753056381, | |
| "epoch": 0.11, | |
| "step": 7000 | |
| }, | |
| { | |
| "loss": 1.2378, | |
| "grad_norm": 2.1661126613616943, | |
| "learning_rate": 0.0001789072121594425, | |
| "epoch": 0.11, | |
| "step": 7025 | |
| }, | |
| { | |
| "loss": 1.1915, | |
| "grad_norm": 2.1428678035736084, | |
| "learning_rate": 0.00017883211678832117, | |
| "epoch": 0.11, | |
| "step": 7050 | |
| }, | |
| { | |
| "loss": 1.2344, | |
| "grad_norm": 1.4568270444869995, | |
| "learning_rate": 0.00017875702141719986, | |
| "epoch": 0.11, | |
| "step": 7075 | |
| }, | |
| { | |
| "loss": 1.2825, | |
| "grad_norm": 1.5431725978851318, | |
| "learning_rate": 0.00017868192604607853, | |
| "epoch": 0.11, | |
| "step": 7100 | |
| }, | |
| { | |
| "loss": 1.2178, | |
| "grad_norm": 1.2386250495910645, | |
| "learning_rate": 0.0001786068306749572, | |
| "epoch": 0.11, | |
| "step": 7125 | |
| }, | |
| { | |
| "loss": 1.2189, | |
| "grad_norm": 1.1443992853164673, | |
| "learning_rate": 0.0001785317353038359, | |
| "epoch": 0.11, | |
| "step": 7150 | |
| }, | |
| { | |
| "loss": 1.2145, | |
| "grad_norm": 1.0868651866912842, | |
| "learning_rate": 0.00017845663993271455, | |
| "epoch": 0.11, | |
| "step": 7175 | |
| }, | |
| { | |
| "loss": 1.2001, | |
| "grad_norm": 1.2621536254882812, | |
| "learning_rate": 0.00017838154456159325, | |
| "epoch": 0.11, | |
| "step": 7200 | |
| }, | |
| { | |
| "loss": 1.2605, | |
| "grad_norm": 1.3004405498504639, | |
| "learning_rate": 0.0001783064491904719, | |
| "epoch": 0.11, | |
| "step": 7225 | |
| }, | |
| { | |
| "loss": 1.1685, | |
| "grad_norm": 1.7868775129318237, | |
| "learning_rate": 0.00017823135381935058, | |
| "epoch": 0.11, | |
| "step": 7250 | |
| }, | |
| { | |
| "loss": 1.1724, | |
| "grad_norm": 1.525883674621582, | |
| "learning_rate": 0.00017815625844822924, | |
| "epoch": 0.11, | |
| "step": 7275 | |
| }, | |
| { | |
| "loss": 1.1917, | |
| "grad_norm": 1.7897926568984985, | |
| "learning_rate": 0.00017808116307710793, | |
| "epoch": 0.11, | |
| "step": 7300 | |
| }, | |
| { | |
| "loss": 1.17, | |
| "grad_norm": 1.770201325416565, | |
| "learning_rate": 0.0001780060677059866, | |
| "epoch": 0.11, | |
| "step": 7325 | |
| }, | |
| { | |
| "loss": 1.1758, | |
| "grad_norm": 1.23914635181427, | |
| "learning_rate": 0.0001779309723348653, | |
| "epoch": 0.11, | |
| "step": 7350 | |
| }, | |
| { | |
| "loss": 1.266, | |
| "grad_norm": 1.5685780048370361, | |
| "learning_rate": 0.00017785587696374398, | |
| "epoch": 0.11, | |
| "step": 7375 | |
| }, | |
| { | |
| "loss": 1.1515, | |
| "grad_norm": 1.4432404041290283, | |
| "learning_rate": 0.00017778078159262265, | |
| "epoch": 0.11, | |
| "step": 7400 | |
| }, | |
| { | |
| "loss": 1.2221, | |
| "grad_norm": 1.4710851907730103, | |
| "learning_rate": 0.00017770568622150131, | |
| "epoch": 0.11, | |
| "step": 7425 | |
| }, | |
| { | |
| "loss": 1.2636, | |
| "grad_norm": 1.5943934917449951, | |
| "learning_rate": 0.00017763059085037998, | |
| "epoch": 0.11, | |
| "step": 7450 | |
| }, | |
| { | |
| "loss": 1.1915, | |
| "grad_norm": 1.3364222049713135, | |
| "learning_rate": 0.00017755549547925867, | |
| "epoch": 0.11, | |
| "step": 7475 | |
| }, | |
| { | |
| "loss": 1.2061, | |
| "grad_norm": 1.3201831579208374, | |
| "learning_rate": 0.00017748040010813734, | |
| "epoch": 0.11, | |
| "step": 7500 | |
| }, | |
| { | |
| "loss": 1.1852, | |
| "grad_norm": 1.3895928859710693, | |
| "learning_rate": 0.00017740530473701603, | |
| "epoch": 0.11, | |
| "step": 7525 | |
| }, | |
| { | |
| "loss": 1.2233, | |
| "grad_norm": 1.0795204639434814, | |
| "learning_rate": 0.0001773302093658947, | |
| "epoch": 0.11, | |
| "step": 7550 | |
| }, | |
| { | |
| "loss": 1.2034, | |
| "grad_norm": 1.7997777462005615, | |
| "learning_rate": 0.00017725511399477336, | |
| "epoch": 0.11, | |
| "step": 7575 | |
| }, | |
| { | |
| "loss": 1.1786, | |
| "grad_norm": 1.156964898109436, | |
| "learning_rate": 0.00017718001862365205, | |
| "epoch": 0.11, | |
| "step": 7600 | |
| }, | |
| { | |
| "loss": 1.1597, | |
| "grad_norm": 1.6956669092178345, | |
| "learning_rate": 0.00017710492325253072, | |
| "epoch": 0.11, | |
| "step": 7625 | |
| }, | |
| { | |
| "loss": 1.272, | |
| "grad_norm": 1.3330657482147217, | |
| "learning_rate": 0.0001770298278814094, | |
| "epoch": 0.11, | |
| "step": 7650 | |
| }, | |
| { | |
| "loss": 1.2597, | |
| "grad_norm": 0.8610468506813049, | |
| "learning_rate": 0.00017695473251028808, | |
| "epoch": 0.12, | |
| "step": 7675 | |
| }, | |
| { | |
| "loss": 1.2418, | |
| "grad_norm": 1.9568647146224976, | |
| "learning_rate": 0.00017687963713916674, | |
| "epoch": 0.12, | |
| "step": 7700 | |
| }, | |
| { | |
| "loss": 1.0969, | |
| "grad_norm": 1.2442560195922852, | |
| "learning_rate": 0.0001768045417680454, | |
| "epoch": 0.12, | |
| "step": 7725 | |
| }, | |
| { | |
| "loss": 1.1941, | |
| "grad_norm": 1.949724793434143, | |
| "learning_rate": 0.0001767294463969241, | |
| "epoch": 0.12, | |
| "step": 7750 | |
| }, | |
| { | |
| "loss": 1.2424, | |
| "grad_norm": 1.4135985374450684, | |
| "learning_rate": 0.0001766543510258028, | |
| "epoch": 0.12, | |
| "step": 7775 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 1.8493655920028687, | |
| "learning_rate": 0.00017657925565468146, | |
| "epoch": 0.12, | |
| "step": 7800 | |
| }, | |
| { | |
| "loss": 1.2078, | |
| "grad_norm": 1.921787977218628, | |
| "learning_rate": 0.00017650416028356015, | |
| "epoch": 0.12, | |
| "step": 7825 | |
| }, | |
| { | |
| "loss": 1.2593, | |
| "grad_norm": 1.7355767488479614, | |
| "learning_rate": 0.00017642906491243881, | |
| "epoch": 0.12, | |
| "step": 7850 | |
| }, | |
| { | |
| "loss": 1.2583, | |
| "grad_norm": 2.5150203704833984, | |
| "learning_rate": 0.00017635396954131748, | |
| "epoch": 0.12, | |
| "step": 7875 | |
| }, | |
| { | |
| "loss": 1.1617, | |
| "grad_norm": 1.4067972898483276, | |
| "learning_rate": 0.00017627887417019615, | |
| "epoch": 0.12, | |
| "step": 7900 | |
| }, | |
| { | |
| "loss": 1.2691, | |
| "grad_norm": 1.6826621294021606, | |
| "learning_rate": 0.00017620377879907484, | |
| "epoch": 0.12, | |
| "step": 7925 | |
| }, | |
| { | |
| "loss": 1.1767, | |
| "grad_norm": 1.0282503366470337, | |
| "learning_rate": 0.0001761286834279535, | |
| "epoch": 0.12, | |
| "step": 7950 | |
| }, | |
| { | |
| "loss": 1.1887, | |
| "grad_norm": 2.0548410415649414, | |
| "learning_rate": 0.0001760535880568322, | |
| "epoch": 0.12, | |
| "step": 7975 | |
| }, | |
| { | |
| "loss": 1.2639, | |
| "grad_norm": 1.1392240524291992, | |
| "learning_rate": 0.00017597849268571086, | |
| "epoch": 0.12, | |
| "step": 8000 | |
| }, | |
| { | |
| "loss": 1.1991, | |
| "grad_norm": 1.8351316452026367, | |
| "learning_rate": 0.00017590339731458953, | |
| "epoch": 0.12, | |
| "step": 8025 | |
| }, | |
| { | |
| "loss": 1.265, | |
| "grad_norm": 1.801256775856018, | |
| "learning_rate": 0.00017582830194346822, | |
| "epoch": 0.12, | |
| "step": 8050 | |
| }, | |
| { | |
| "loss": 1.2895, | |
| "grad_norm": 1.379420280456543, | |
| "learning_rate": 0.00017575320657234688, | |
| "epoch": 0.12, | |
| "step": 8075 | |
| }, | |
| { | |
| "loss": 1.1896, | |
| "grad_norm": 1.7537370920181274, | |
| "learning_rate": 0.00017567811120122558, | |
| "epoch": 0.12, | |
| "step": 8100 | |
| }, | |
| { | |
| "loss": 1.2039, | |
| "grad_norm": 2.159595489501953, | |
| "learning_rate": 0.00017560301583010424, | |
| "epoch": 0.12, | |
| "step": 8125 | |
| }, | |
| { | |
| "loss": 1.2145, | |
| "grad_norm": 1.690534234046936, | |
| "learning_rate": 0.0001755279204589829, | |
| "epoch": 0.12, | |
| "step": 8150 | |
| }, | |
| { | |
| "loss": 1.1935, | |
| "grad_norm": 1.0568920373916626, | |
| "learning_rate": 0.00017545282508786157, | |
| "epoch": 0.12, | |
| "step": 8175 | |
| }, | |
| { | |
| "loss": 1.2304, | |
| "grad_norm": 1.6981247663497925, | |
| "learning_rate": 0.00017537772971674026, | |
| "epoch": 0.12, | |
| "step": 8200 | |
| }, | |
| { | |
| "loss": 1.1961, | |
| "grad_norm": 2.1610305309295654, | |
| "learning_rate": 0.00017530263434561896, | |
| "epoch": 0.12, | |
| "step": 8225 | |
| }, | |
| { | |
| "loss": 1.2387, | |
| "grad_norm": 1.8722481727600098, | |
| "learning_rate": 0.00017522753897449762, | |
| "epoch": 0.12, | |
| "step": 8250 | |
| }, | |
| { | |
| "loss": 1.2477, | |
| "grad_norm": 2.1257529258728027, | |
| "learning_rate": 0.00017515244360337631, | |
| "epoch": 0.12, | |
| "step": 8275 | |
| }, | |
| { | |
| "loss": 1.173, | |
| "grad_norm": 2.786665439605713, | |
| "learning_rate": 0.00017507734823225498, | |
| "epoch": 0.12, | |
| "step": 8300 | |
| }, | |
| { | |
| "loss": 1.3121, | |
| "grad_norm": 1.4147156476974487, | |
| "learning_rate": 0.00017500225286113365, | |
| "epoch": 0.13, | |
| "step": 8325 | |
| }, | |
| { | |
| "loss": 1.2495, | |
| "grad_norm": 1.6025553941726685, | |
| "learning_rate": 0.0001749271574900123, | |
| "epoch": 0.13, | |
| "step": 8350 | |
| }, | |
| { | |
| "loss": 1.2802, | |
| "grad_norm": 1.6167206764221191, | |
| "learning_rate": 0.000174852062118891, | |
| "epoch": 0.13, | |
| "step": 8375 | |
| }, | |
| { | |
| "loss": 1.2587, | |
| "grad_norm": 1.346677303314209, | |
| "learning_rate": 0.00017477696674776967, | |
| "epoch": 0.13, | |
| "step": 8400 | |
| }, | |
| { | |
| "loss": 1.1743, | |
| "grad_norm": 1.8484021425247192, | |
| "learning_rate": 0.00017470187137664836, | |
| "epoch": 0.13, | |
| "step": 8425 | |
| }, | |
| { | |
| "loss": 1.2875, | |
| "grad_norm": 2.0601062774658203, | |
| "learning_rate": 0.00017462677600552703, | |
| "epoch": 0.13, | |
| "step": 8450 | |
| }, | |
| { | |
| "loss": 1.1846, | |
| "grad_norm": 1.455112338066101, | |
| "learning_rate": 0.0001745516806344057, | |
| "epoch": 0.13, | |
| "step": 8475 | |
| }, | |
| { | |
| "loss": 1.2606, | |
| "grad_norm": 1.336016058921814, | |
| "learning_rate": 0.00017447658526328438, | |
| "epoch": 0.13, | |
| "step": 8500 | |
| }, | |
| { | |
| "loss": 1.2648, | |
| "grad_norm": 0.9691543579101562, | |
| "learning_rate": 0.00017440148989216305, | |
| "epoch": 0.13, | |
| "step": 8525 | |
| }, | |
| { | |
| "loss": 1.2686, | |
| "grad_norm": 1.4051158428192139, | |
| "learning_rate": 0.00017432639452104174, | |
| "epoch": 0.13, | |
| "step": 8550 | |
| }, | |
| { | |
| "loss": 1.2357, | |
| "grad_norm": 1.1069400310516357, | |
| "learning_rate": 0.0001742512991499204, | |
| "epoch": 0.13, | |
| "step": 8575 | |
| }, | |
| { | |
| "loss": 1.243, | |
| "grad_norm": 1.2926398515701294, | |
| "learning_rate": 0.00017417920759364394, | |
| "epoch": 0.13, | |
| "step": 8600 | |
| }, | |
| { | |
| "loss": 1.2178, | |
| "grad_norm": 2.581450939178467, | |
| "learning_rate": 0.0001741041122225226, | |
| "epoch": 0.13, | |
| "step": 8625 | |
| }, | |
| { | |
| "loss": 1.2037, | |
| "grad_norm": 3.525554656982422, | |
| "learning_rate": 0.0001740290168514013, | |
| "epoch": 0.13, | |
| "step": 8650 | |
| }, | |
| { | |
| "loss": 1.2447, | |
| "grad_norm": 1.7870151996612549, | |
| "learning_rate": 0.00017395392148027996, | |
| "epoch": 0.13, | |
| "step": 8675 | |
| }, | |
| { | |
| "loss": 1.2299, | |
| "grad_norm": 1.8541524410247803, | |
| "learning_rate": 0.00017387882610915865, | |
| "epoch": 0.13, | |
| "step": 8700 | |
| }, | |
| { | |
| "loss": 1.2425, | |
| "grad_norm": 1.767638921737671, | |
| "learning_rate": 0.00017380373073803732, | |
| "epoch": 0.13, | |
| "step": 8725 | |
| }, | |
| { | |
| "loss": 1.2167, | |
| "grad_norm": 1.1607838869094849, | |
| "learning_rate": 0.00017372863536691598, | |
| "epoch": 0.13, | |
| "step": 8750 | |
| }, | |
| { | |
| "loss": 1.2148, | |
| "grad_norm": 2.044637441635132, | |
| "learning_rate": 0.00017365353999579467, | |
| "epoch": 0.13, | |
| "step": 8775 | |
| }, | |
| { | |
| "loss": 1.1762, | |
| "grad_norm": 1.519467830657959, | |
| "learning_rate": 0.00017357844462467334, | |
| "epoch": 0.13, | |
| "step": 8800 | |
| }, | |
| { | |
| "loss": 1.2796, | |
| "grad_norm": 1.461225986480713, | |
| "learning_rate": 0.00017350334925355203, | |
| "epoch": 0.13, | |
| "step": 8825 | |
| }, | |
| { | |
| "loss": 1.2516, | |
| "grad_norm": 1.52583646774292, | |
| "learning_rate": 0.0001734282538824307, | |
| "epoch": 0.13, | |
| "step": 8850 | |
| }, | |
| { | |
| "loss": 1.1365, | |
| "grad_norm": 1.617851734161377, | |
| "learning_rate": 0.0001733531585113094, | |
| "epoch": 0.13, | |
| "step": 8875 | |
| }, | |
| { | |
| "loss": 1.1494, | |
| "grad_norm": 1.112012267112732, | |
| "learning_rate": 0.00017327806314018803, | |
| "epoch": 0.13, | |
| "step": 8900 | |
| }, | |
| { | |
| "loss": 1.199, | |
| "grad_norm": 1.9818586111068726, | |
| "learning_rate": 0.00017320296776906672, | |
| "epoch": 0.13, | |
| "step": 8925 | |
| }, | |
| { | |
| "loss": 1.2784, | |
| "grad_norm": 1.3736644983291626, | |
| "learning_rate": 0.00017312787239794539, | |
| "epoch": 0.13, | |
| "step": 8950 | |
| }, | |
| { | |
| "loss": 1.2714, | |
| "grad_norm": 1.3875724077224731, | |
| "learning_rate": 0.00017305277702682408, | |
| "epoch": 0.13, | |
| "step": 8975 | |
| }, | |
| { | |
| "loss": 1.2155, | |
| "grad_norm": 1.4930505752563477, | |
| "learning_rate": 0.00017297768165570277, | |
| "epoch": 0.14, | |
| "step": 9000 | |
| }, | |
| { | |
| "loss": 1.237, | |
| "grad_norm": 1.9728326797485352, | |
| "learning_rate": 0.00017290258628458144, | |
| "epoch": 0.14, | |
| "step": 9025 | |
| }, | |
| { | |
| "loss": 1.1828, | |
| "grad_norm": 1.2152589559555054, | |
| "learning_rate": 0.0001728274909134601, | |
| "epoch": 0.14, | |
| "step": 9050 | |
| }, | |
| { | |
| "loss": 1.2602, | |
| "grad_norm": 2.241239547729492, | |
| "learning_rate": 0.00017275239554233877, | |
| "epoch": 0.14, | |
| "step": 9075 | |
| }, | |
| { | |
| "loss": 1.2174, | |
| "grad_norm": 2.192455768585205, | |
| "learning_rate": 0.00017267730017121746, | |
| "epoch": 0.14, | |
| "step": 9100 | |
| }, | |
| { | |
| "loss": 1.2197, | |
| "grad_norm": 1.2781050205230713, | |
| "learning_rate": 0.00017260220480009612, | |
| "epoch": 0.14, | |
| "step": 9125 | |
| }, | |
| { | |
| "loss": 1.1877, | |
| "grad_norm": 1.4289566278457642, | |
| "learning_rate": 0.00017252710942897482, | |
| "epoch": 0.14, | |
| "step": 9150 | |
| }, | |
| { | |
| "loss": 1.2482, | |
| "grad_norm": 1.4809447526931763, | |
| "learning_rate": 0.00017245201405785348, | |
| "epoch": 0.14, | |
| "step": 9175 | |
| }, | |
| { | |
| "loss": 1.2194, | |
| "grad_norm": 1.5703109502792358, | |
| "learning_rate": 0.00017237691868673215, | |
| "epoch": 0.14, | |
| "step": 9200 | |
| }, | |
| { | |
| "loss": 1.2036, | |
| "grad_norm": 1.5251587629318237, | |
| "learning_rate": 0.00017230182331561084, | |
| "epoch": 0.14, | |
| "step": 9225 | |
| }, | |
| { | |
| "loss": 1.2469, | |
| "grad_norm": 0.9070261120796204, | |
| "learning_rate": 0.0001722267279444895, | |
| "epoch": 0.14, | |
| "step": 9250 | |
| }, | |
| { | |
| "loss": 1.2316, | |
| "grad_norm": 2.398056745529175, | |
| "learning_rate": 0.0001721516325733682, | |
| "epoch": 0.14, | |
| "step": 9275 | |
| }, | |
| { | |
| "loss": 1.2225, | |
| "grad_norm": 1.3680628538131714, | |
| "learning_rate": 0.00017207653720224686, | |
| "epoch": 0.14, | |
| "step": 9300 | |
| }, | |
| { | |
| "loss": 1.2154, | |
| "grad_norm": 1.577989935874939, | |
| "learning_rate": 0.00017200144183112556, | |
| "epoch": 0.14, | |
| "step": 9325 | |
| }, | |
| { | |
| "loss": 1.1482, | |
| "grad_norm": 1.868891716003418, | |
| "learning_rate": 0.0001719263464600042, | |
| "epoch": 0.14, | |
| "step": 9350 | |
| }, | |
| { | |
| "loss": 1.2566, | |
| "grad_norm": 2.225888729095459, | |
| "learning_rate": 0.00017185125108888289, | |
| "epoch": 0.14, | |
| "step": 9375 | |
| }, | |
| { | |
| "loss": 1.3212, | |
| "grad_norm": 1.519579529762268, | |
| "learning_rate": 0.00017177615571776155, | |
| "epoch": 0.14, | |
| "step": 9400 | |
| }, | |
| { | |
| "loss": 1.2287, | |
| "grad_norm": 1.0716164112091064, | |
| "learning_rate": 0.00017170106034664024, | |
| "epoch": 0.14, | |
| "step": 9425 | |
| }, | |
| { | |
| "loss": 1.1992, | |
| "grad_norm": 1.8280526399612427, | |
| "learning_rate": 0.00017162596497551894, | |
| "epoch": 0.14, | |
| "step": 9450 | |
| }, | |
| { | |
| "loss": 1.1633, | |
| "grad_norm": 1.8167325258255005, | |
| "learning_rate": 0.0001715508696043976, | |
| "epoch": 0.14, | |
| "step": 9475 | |
| }, | |
| { | |
| "loss": 1.2104, | |
| "grad_norm": 1.3616135120391846, | |
| "learning_rate": 0.00017147577423327627, | |
| "epoch": 0.14, | |
| "step": 9500 | |
| }, | |
| { | |
| "loss": 1.1943, | |
| "grad_norm": 2.055335283279419, | |
| "learning_rate": 0.00017140067886215493, | |
| "epoch": 0.14, | |
| "step": 9525 | |
| }, | |
| { | |
| "loss": 1.1744, | |
| "grad_norm": 1.173204779624939, | |
| "learning_rate": 0.00017132558349103362, | |
| "epoch": 0.14, | |
| "step": 9550 | |
| }, | |
| { | |
| "loss": 1.204, | |
| "grad_norm": 1.9650391340255737, | |
| "learning_rate": 0.0001712504881199123, | |
| "epoch": 0.14, | |
| "step": 9575 | |
| }, | |
| { | |
| "loss": 1.1757, | |
| "grad_norm": 1.167233943939209, | |
| "learning_rate": 0.00017117539274879098, | |
| "epoch": 0.14, | |
| "step": 9600 | |
| }, | |
| { | |
| "loss": 1.1676, | |
| "grad_norm": 1.160571575164795, | |
| "learning_rate": 0.00017110029737766965, | |
| "epoch": 0.14, | |
| "step": 9625 | |
| }, | |
| { | |
| "loss": 1.2729, | |
| "grad_norm": 1.3096935749053955, | |
| "learning_rate": 0.0001710252020065483, | |
| "epoch": 0.14, | |
| "step": 9650 | |
| }, | |
| { | |
| "loss": 1.1832, | |
| "grad_norm": 1.2549477815628052, | |
| "learning_rate": 0.000170950106635427, | |
| "epoch": 0.15, | |
| "step": 9675 | |
| }, | |
| { | |
| "loss": 1.1984, | |
| "grad_norm": 1.156880497932434, | |
| "learning_rate": 0.00017087501126430567, | |
| "epoch": 0.15, | |
| "step": 9700 | |
| }, | |
| { | |
| "loss": 1.2719, | |
| "grad_norm": 1.974812626838684, | |
| "learning_rate": 0.00017079991589318436, | |
| "epoch": 0.15, | |
| "step": 9725 | |
| }, | |
| { | |
| "loss": 1.2269, | |
| "grad_norm": 1.3916237354278564, | |
| "learning_rate": 0.00017072482052206303, | |
| "epoch": 0.15, | |
| "step": 9750 | |
| }, | |
| { | |
| "loss": 1.2587, | |
| "grad_norm": 1.1099380254745483, | |
| "learning_rate": 0.00017064972515094172, | |
| "epoch": 0.15, | |
| "step": 9775 | |
| }, | |
| { | |
| "loss": 1.171, | |
| "grad_norm": 1.169327735900879, | |
| "learning_rate": 0.00017057462977982036, | |
| "epoch": 0.15, | |
| "step": 9800 | |
| }, | |
| { | |
| "loss": 1.1879, | |
| "grad_norm": 1.3058301210403442, | |
| "learning_rate": 0.00017049953440869905, | |
| "epoch": 0.15, | |
| "step": 9825 | |
| }, | |
| { | |
| "loss": 1.3067, | |
| "grad_norm": 1.6860321760177612, | |
| "learning_rate": 0.00017042443903757772, | |
| "epoch": 0.15, | |
| "step": 9850 | |
| }, | |
| { | |
| "loss": 1.2436, | |
| "grad_norm": 1.7748676538467407, | |
| "learning_rate": 0.0001703493436664564, | |
| "epoch": 0.15, | |
| "step": 9875 | |
| }, | |
| { | |
| "loss": 1.2847, | |
| "grad_norm": 1.3805527687072754, | |
| "learning_rate": 0.0001702742482953351, | |
| "epoch": 0.15, | |
| "step": 9900 | |
| }, | |
| { | |
| "loss": 1.1507, | |
| "grad_norm": 1.1719329357147217, | |
| "learning_rate": 0.00017019915292421377, | |
| "epoch": 0.15, | |
| "step": 9925 | |
| }, | |
| { | |
| "loss": 1.1982, | |
| "grad_norm": 3.254032850265503, | |
| "learning_rate": 0.00017012405755309243, | |
| "epoch": 0.15, | |
| "step": 9950 | |
| }, | |
| { | |
| "loss": 1.2556, | |
| "grad_norm": 1.6937150955200195, | |
| "learning_rate": 0.0001700489621819711, | |
| "epoch": 0.15, | |
| "step": 9975 | |
| }, | |
| { | |
| "loss": 1.1951, | |
| "grad_norm": 0.9732112884521484, | |
| "learning_rate": 0.0001699738668108498, | |
| "epoch": 0.15, | |
| "step": 10000 | |
| }, | |
| { | |
| "loss": 1.2225, | |
| "grad_norm": 1.152357816696167, | |
| "learning_rate": 0.00016989877143972845, | |
| "epoch": 0.15, | |
| "step": 10025 | |
| }, | |
| { | |
| "loss": 1.2185, | |
| "grad_norm": 1.9174104928970337, | |
| "learning_rate": 0.00016982367606860715, | |
| "epoch": 0.15, | |
| "step": 10050 | |
| }, | |
| { | |
| "loss": 1.1886, | |
| "grad_norm": 2.638831377029419, | |
| "learning_rate": 0.0001697485806974858, | |
| "epoch": 0.15, | |
| "step": 10075 | |
| }, | |
| { | |
| "loss": 1.2805, | |
| "grad_norm": 1.4505808353424072, | |
| "learning_rate": 0.00016967348532636448, | |
| "epoch": 0.15, | |
| "step": 10100 | |
| }, | |
| { | |
| "loss": 1.2714, | |
| "grad_norm": 1.9908664226531982, | |
| "learning_rate": 0.00016959838995524317, | |
| "epoch": 0.15, | |
| "step": 10125 | |
| }, | |
| { | |
| "loss": 1.2362, | |
| "grad_norm": 0.9299131035804749, | |
| "learning_rate": 0.00016952329458412184, | |
| "epoch": 0.15, | |
| "step": 10150 | |
| }, | |
| { | |
| "loss": 1.212, | |
| "grad_norm": 2.036597490310669, | |
| "learning_rate": 0.00016944819921300053, | |
| "epoch": 0.15, | |
| "step": 10175 | |
| }, | |
| { | |
| "loss": 1.1789, | |
| "grad_norm": 0.9963513016700745, | |
| "learning_rate": 0.0001693731038418792, | |
| "epoch": 0.15, | |
| "step": 10200 | |
| }, | |
| { | |
| "loss": 1.2206, | |
| "grad_norm": 1.2980750799179077, | |
| "learning_rate": 0.00016929800847075789, | |
| "epoch": 0.15, | |
| "step": 10225 | |
| }, | |
| { | |
| "loss": 1.2015, | |
| "grad_norm": 2.1614062786102295, | |
| "learning_rate": 0.00016922291309963652, | |
| "epoch": 0.15, | |
| "step": 10250 | |
| }, | |
| { | |
| "loss": 1.2411, | |
| "grad_norm": 2.045715093612671, | |
| "learning_rate": 0.00016914781772851522, | |
| "epoch": 0.15, | |
| "step": 10275 | |
| }, | |
| { | |
| "loss": 1.2117, | |
| "grad_norm": 1.5198246240615845, | |
| "learning_rate": 0.0001690727223573939, | |
| "epoch": 0.15, | |
| "step": 10300 | |
| }, | |
| { | |
| "loss": 1.2269, | |
| "grad_norm": 1.563063383102417, | |
| "learning_rate": 0.00016900063080111744, | |
| "epoch": 0.16, | |
| "step": 10325 | |
| }, | |
| { | |
| "loss": 1.2857, | |
| "grad_norm": 1.2115886211395264, | |
| "learning_rate": 0.0001689255354299961, | |
| "epoch": 0.16, | |
| "step": 10350 | |
| }, | |
| { | |
| "loss": 1.2339, | |
| "grad_norm": 1.6522163152694702, | |
| "learning_rate": 0.00016885044005887477, | |
| "epoch": 0.16, | |
| "step": 10375 | |
| }, | |
| { | |
| "loss": 1.2416, | |
| "grad_norm": 1.334186315536499, | |
| "learning_rate": 0.00016877534468775343, | |
| "epoch": 0.16, | |
| "step": 10400 | |
| }, | |
| { | |
| "loss": 1.1811, | |
| "grad_norm": 2.520540475845337, | |
| "learning_rate": 0.00016870024931663213, | |
| "epoch": 0.16, | |
| "step": 10425 | |
| }, | |
| { | |
| "loss": 1.2504, | |
| "grad_norm": 1.4244968891143799, | |
| "learning_rate": 0.00016862515394551082, | |
| "epoch": 0.16, | |
| "step": 10450 | |
| }, | |
| { | |
| "loss": 1.3024, | |
| "grad_norm": 1.7212327718734741, | |
| "learning_rate": 0.00016855005857438948, | |
| "epoch": 0.16, | |
| "step": 10475 | |
| }, | |
| { | |
| "loss": 1.2481, | |
| "grad_norm": 1.369234323501587, | |
| "learning_rate": 0.00016847496320326818, | |
| "epoch": 0.16, | |
| "step": 10500 | |
| }, | |
| { | |
| "loss": 1.2456, | |
| "grad_norm": 1.2592421770095825, | |
| "learning_rate": 0.00016839986783214684, | |
| "epoch": 0.16, | |
| "step": 10525 | |
| }, | |
| { | |
| "loss": 1.2845, | |
| "grad_norm": 1.891320824623108, | |
| "learning_rate": 0.0001683247724610255, | |
| "epoch": 0.16, | |
| "step": 10550 | |
| }, | |
| { | |
| "loss": 1.1581, | |
| "grad_norm": 1.653385877609253, | |
| "learning_rate": 0.00016824967708990417, | |
| "epoch": 0.16, | |
| "step": 10575 | |
| }, | |
| { | |
| "loss": 1.2469, | |
| "grad_norm": 1.4522532224655151, | |
| "learning_rate": 0.00016817458171878286, | |
| "epoch": 0.16, | |
| "step": 10600 | |
| }, | |
| { | |
| "loss": 1.1964, | |
| "grad_norm": 2.0278687477111816, | |
| "learning_rate": 0.00016809948634766153, | |
| "epoch": 0.16, | |
| "step": 10625 | |
| }, | |
| { | |
| "loss": 1.1826, | |
| "grad_norm": 7.241672039031982, | |
| "learning_rate": 0.00016802439097654022, | |
| "epoch": 0.16, | |
| "step": 10650 | |
| }, | |
| { | |
| "loss": 1.2321, | |
| "grad_norm": 1.5438281297683716, | |
| "learning_rate": 0.0001679553032351086, | |
| "epoch": 0.16, | |
| "step": 10675 | |
| }, | |
| { | |
| "loss": 1.1776, | |
| "grad_norm": 1.0334819555282593, | |
| "learning_rate": 0.00016788020786398728, | |
| "epoch": 0.16, | |
| "step": 10700 | |
| }, | |
| { | |
| "loss": 1.2905, | |
| "grad_norm": 1.578046202659607, | |
| "learning_rate": 0.0001678081163077108, | |
| "epoch": 0.16, | |
| "step": 10725 | |
| }, | |
| { | |
| "loss": 1.1721, | |
| "grad_norm": 1.1447938680648804, | |
| "learning_rate": 0.00016773302093658947, | |
| "epoch": 0.16, | |
| "step": 10750 | |
| }, | |
| { | |
| "loss": 1.1305, | |
| "grad_norm": 1.328674554824829, | |
| "learning_rate": 0.00016765792556546817, | |
| "epoch": 0.16, | |
| "step": 10775 | |
| }, | |
| { | |
| "loss": 1.1729, | |
| "grad_norm": 1.5958043336868286, | |
| "learning_rate": 0.00016758283019434683, | |
| "epoch": 0.16, | |
| "step": 10800 | |
| }, | |
| { | |
| "loss": 1.1459, | |
| "grad_norm": 1.4962025880813599, | |
| "learning_rate": 0.0001675077348232255, | |
| "epoch": 0.16, | |
| "step": 10825 | |
| }, | |
| { | |
| "loss": 1.1864, | |
| "grad_norm": 1.540279507637024, | |
| "learning_rate": 0.0001674326394521042, | |
| "epoch": 0.16, | |
| "step": 10850 | |
| }, | |
| { | |
| "loss": 1.2842, | |
| "grad_norm": 1.6456973552703857, | |
| "learning_rate": 0.00016735754408098285, | |
| "epoch": 0.16, | |
| "step": 10875 | |
| }, | |
| { | |
| "loss": 1.2236, | |
| "grad_norm": 1.9610776901245117, | |
| "learning_rate": 0.00016728244870986155, | |
| "epoch": 0.16, | |
| "step": 10900 | |
| }, | |
| { | |
| "loss": 1.1825, | |
| "grad_norm": 1.4995285272598267, | |
| "learning_rate": 0.0001672073533387402, | |
| "epoch": 0.16, | |
| "step": 10925 | |
| }, | |
| { | |
| "loss": 1.2341, | |
| "grad_norm": 1.0755623579025269, | |
| "learning_rate": 0.00016713225796761888, | |
| "epoch": 0.16, | |
| "step": 10950 | |
| }, | |
| { | |
| "loss": 1.2119, | |
| "grad_norm": 1.8127145767211914, | |
| "learning_rate": 0.00016705716259649754, | |
| "epoch": 0.16, | |
| "step": 10975 | |
| }, | |
| { | |
| "loss": 1.2211, | |
| "grad_norm": 1.8502295017242432, | |
| "learning_rate": 0.00016698206722537623, | |
| "epoch": 0.17, | |
| "step": 11000 | |
| }, | |
| { | |
| "loss": 1.2158, | |
| "grad_norm": 1.6311461925506592, | |
| "learning_rate": 0.0001669069718542549, | |
| "epoch": 0.17, | |
| "step": 11025 | |
| }, | |
| { | |
| "loss": 1.2579, | |
| "grad_norm": 1.3985036611557007, | |
| "learning_rate": 0.0001668318764831336, | |
| "epoch": 0.17, | |
| "step": 11050 | |
| }, | |
| { | |
| "loss": 1.1595, | |
| "grad_norm": 2.4565913677215576, | |
| "learning_rate": 0.00016675678111201228, | |
| "epoch": 0.17, | |
| "step": 11075 | |
| }, | |
| { | |
| "loss": 1.2151, | |
| "grad_norm": 1.9943021535873413, | |
| "learning_rate": 0.00016668168574089092, | |
| "epoch": 0.17, | |
| "step": 11100 | |
| }, | |
| { | |
| "loss": 1.2253, | |
| "grad_norm": 1.246576189994812, | |
| "learning_rate": 0.00016660659036976962, | |
| "epoch": 0.17, | |
| "step": 11125 | |
| }, | |
| { | |
| "loss": 1.1946, | |
| "grad_norm": 1.1769171953201294, | |
| "learning_rate": 0.00016653149499864828, | |
| "epoch": 0.17, | |
| "step": 11150 | |
| }, | |
| { | |
| "loss": 1.2189, | |
| "grad_norm": 1.2264093160629272, | |
| "learning_rate": 0.00016645639962752697, | |
| "epoch": 0.17, | |
| "step": 11175 | |
| }, | |
| { | |
| "loss": 1.1773, | |
| "grad_norm": 1.0736924409866333, | |
| "learning_rate": 0.00016638130425640564, | |
| "epoch": 0.17, | |
| "step": 11200 | |
| }, | |
| { | |
| "loss": 1.2057, | |
| "grad_norm": 1.527783751487732, | |
| "learning_rate": 0.00016630620888528433, | |
| "epoch": 0.17, | |
| "step": 11225 | |
| }, | |
| { | |
| "loss": 1.2292, | |
| "grad_norm": 1.3747711181640625, | |
| "learning_rate": 0.000166231113514163, | |
| "epoch": 0.17, | |
| "step": 11250 | |
| }, | |
| { | |
| "loss": 1.2385, | |
| "grad_norm": 1.5345367193222046, | |
| "learning_rate": 0.00016615601814304166, | |
| "epoch": 0.17, | |
| "step": 11275 | |
| }, | |
| { | |
| "loss": 1.2456, | |
| "grad_norm": 1.1881415843963623, | |
| "learning_rate": 0.00016608092277192035, | |
| "epoch": 0.17, | |
| "step": 11300 | |
| }, | |
| { | |
| "loss": 1.2099, | |
| "grad_norm": 1.1072256565093994, | |
| "learning_rate": 0.00016600582740079902, | |
| "epoch": 0.17, | |
| "step": 11325 | |
| }, | |
| { | |
| "loss": 1.2128, | |
| "grad_norm": 2.2356455326080322, | |
| "learning_rate": 0.0001659307320296777, | |
| "epoch": 0.17, | |
| "step": 11350 | |
| }, | |
| { | |
| "loss": 1.1638, | |
| "grad_norm": 3.343397855758667, | |
| "learning_rate": 0.00016585563665855638, | |
| "epoch": 0.17, | |
| "step": 11375 | |
| }, | |
| { | |
| "loss": 1.1879, | |
| "grad_norm": 1.5066440105438232, | |
| "learning_rate": 0.00016578054128743504, | |
| "epoch": 0.17, | |
| "step": 11400 | |
| }, | |
| { | |
| "loss": 1.1868, | |
| "grad_norm": 1.4099555015563965, | |
| "learning_rate": 0.0001657054459163137, | |
| "epoch": 0.17, | |
| "step": 11425 | |
| }, | |
| { | |
| "loss": 1.1882, | |
| "grad_norm": 1.6867519617080688, | |
| "learning_rate": 0.0001656303505451924, | |
| "epoch": 0.17, | |
| "step": 11450 | |
| }, | |
| { | |
| "loss": 1.1986, | |
| "grad_norm": 1.4436876773834229, | |
| "learning_rate": 0.0001655552551740711, | |
| "epoch": 0.17, | |
| "step": 11475 | |
| }, | |
| { | |
| "loss": 1.2023, | |
| "grad_norm": 1.2447092533111572, | |
| "learning_rate": 0.00016548015980294976, | |
| "epoch": 0.17, | |
| "step": 11500 | |
| }, | |
| { | |
| "loss": 1.1136, | |
| "grad_norm": 2.2803616523742676, | |
| "learning_rate": 0.00016540506443182845, | |
| "epoch": 0.17, | |
| "step": 11525 | |
| }, | |
| { | |
| "loss": 1.2135, | |
| "grad_norm": 2.992870807647705, | |
| "learning_rate": 0.00016532996906070712, | |
| "epoch": 0.17, | |
| "step": 11550 | |
| }, | |
| { | |
| "loss": 1.1864, | |
| "grad_norm": 1.6845029592514038, | |
| "learning_rate": 0.00016525487368958578, | |
| "epoch": 0.17, | |
| "step": 11575 | |
| }, | |
| { | |
| "loss": 1.1924, | |
| "grad_norm": 1.52716863155365, | |
| "learning_rate": 0.00016517977831846445, | |
| "epoch": 0.17, | |
| "step": 11600 | |
| }, | |
| { | |
| "loss": 1.2462, | |
| "grad_norm": 1.1273847818374634, | |
| "learning_rate": 0.00016510468294734314, | |
| "epoch": 0.17, | |
| "step": 11625 | |
| }, | |
| { | |
| "loss": 1.2411, | |
| "grad_norm": 1.472161054611206, | |
| "learning_rate": 0.0001650295875762218, | |
| "epoch": 0.17, | |
| "step": 11650 | |
| }, | |
| { | |
| "loss": 1.2534, | |
| "grad_norm": 1.0381845235824585, | |
| "learning_rate": 0.0001649544922051005, | |
| "epoch": 0.18, | |
| "step": 11675 | |
| }, | |
| { | |
| "loss": 1.1666, | |
| "grad_norm": 1.7663735151290894, | |
| "learning_rate": 0.00016487939683397916, | |
| "epoch": 0.18, | |
| "step": 11700 | |
| }, | |
| { | |
| "loss": 1.1811, | |
| "grad_norm": 1.8977352380752563, | |
| "learning_rate": 0.00016480430146285783, | |
| "epoch": 0.18, | |
| "step": 11725 | |
| }, | |
| { | |
| "loss": 1.2674, | |
| "grad_norm": 1.2944955825805664, | |
| "learning_rate": 0.00016472920609173652, | |
| "epoch": 0.18, | |
| "step": 11750 | |
| }, | |
| { | |
| "loss": 1.259, | |
| "grad_norm": 1.2735075950622559, | |
| "learning_rate": 0.00016465411072061518, | |
| "epoch": 0.18, | |
| "step": 11775 | |
| }, | |
| { | |
| "loss": 1.1718, | |
| "grad_norm": 1.3027160167694092, | |
| "learning_rate": 0.00016457901534949388, | |
| "epoch": 0.18, | |
| "step": 11800 | |
| }, | |
| { | |
| "loss": 1.2034, | |
| "grad_norm": 1.0722211599349976, | |
| "learning_rate": 0.00016450391997837254, | |
| "epoch": 0.18, | |
| "step": 11825 | |
| }, | |
| { | |
| "loss": 1.1761, | |
| "grad_norm": 2.5194356441497803, | |
| "learning_rate": 0.0001644288246072512, | |
| "epoch": 0.18, | |
| "step": 11850 | |
| }, | |
| { | |
| "loss": 1.213, | |
| "grad_norm": 1.5070539712905884, | |
| "learning_rate": 0.00016435372923612987, | |
| "epoch": 0.18, | |
| "step": 11875 | |
| }, | |
| { | |
| "loss": 1.1899, | |
| "grad_norm": 2.126924991607666, | |
| "learning_rate": 0.00016427863386500857, | |
| "epoch": 0.18, | |
| "step": 11900 | |
| }, | |
| { | |
| "loss": 1.2114, | |
| "grad_norm": 1.6474621295928955, | |
| "learning_rate": 0.00016420353849388726, | |
| "epoch": 0.18, | |
| "step": 11925 | |
| }, | |
| { | |
| "loss": 1.2885, | |
| "grad_norm": 1.4247677326202393, | |
| "learning_rate": 0.00016412844312276592, | |
| "epoch": 0.18, | |
| "step": 11950 | |
| }, | |
| { | |
| "loss": 1.1922, | |
| "grad_norm": 1.7299450635910034, | |
| "learning_rate": 0.00016405334775164462, | |
| "epoch": 0.18, | |
| "step": 11975 | |
| }, | |
| { | |
| "loss": 1.255, | |
| "grad_norm": 1.1741243600845337, | |
| "learning_rate": 0.00016397825238052328, | |
| "epoch": 0.18, | |
| "step": 12000 | |
| }, | |
| { | |
| "loss": 1.1358, | |
| "grad_norm": 3.301985502243042, | |
| "learning_rate": 0.00016390315700940195, | |
| "epoch": 0.18, | |
| "step": 12025 | |
| }, | |
| { | |
| "loss": 1.2252, | |
| "grad_norm": 1.730089783668518, | |
| "learning_rate": 0.0001638280616382806, | |
| "epoch": 0.18, | |
| "step": 12050 | |
| }, | |
| { | |
| "loss": 1.2015, | |
| "grad_norm": 0.9660411477088928, | |
| "learning_rate": 0.0001637529662671593, | |
| "epoch": 0.18, | |
| "step": 12075 | |
| }, | |
| { | |
| "loss": 1.1952, | |
| "grad_norm": 1.2349923849105835, | |
| "learning_rate": 0.00016367787089603797, | |
| "epoch": 0.18, | |
| "step": 12100 | |
| }, | |
| { | |
| "loss": 1.1539, | |
| "grad_norm": 1.5074914693832397, | |
| "learning_rate": 0.00016360277552491666, | |
| "epoch": 0.18, | |
| "step": 12125 | |
| }, | |
| { | |
| "loss": 1.2497, | |
| "grad_norm": 1.2159485816955566, | |
| "learning_rate": 0.00016352768015379533, | |
| "epoch": 0.18, | |
| "step": 12150 | |
| }, | |
| { | |
| "loss": 1.2624, | |
| "grad_norm": 1.7598042488098145, | |
| "learning_rate": 0.000163452584782674, | |
| "epoch": 0.18, | |
| "step": 12175 | |
| }, | |
| { | |
| "loss": 1.2243, | |
| "grad_norm": 1.2727563381195068, | |
| "learning_rate": 0.00016337748941155268, | |
| "epoch": 0.18, | |
| "step": 12200 | |
| }, | |
| { | |
| "loss": 1.2093, | |
| "grad_norm": 1.205769658088684, | |
| "learning_rate": 0.00016330239404043135, | |
| "epoch": 0.18, | |
| "step": 12225 | |
| }, | |
| { | |
| "loss": 1.1866, | |
| "grad_norm": 5.114007949829102, | |
| "learning_rate": 0.00016322729866931004, | |
| "epoch": 0.18, | |
| "step": 12250 | |
| }, | |
| { | |
| "loss": 1.2824, | |
| "grad_norm": 2.016160488128662, | |
| "learning_rate": 0.0001631522032981887, | |
| "epoch": 0.18, | |
| "step": 12275 | |
| }, | |
| { | |
| "loss": 1.1501, | |
| "grad_norm": 1.1405665874481201, | |
| "learning_rate": 0.00016307710792706737, | |
| "epoch": 0.18, | |
| "step": 12300 | |
| }, | |
| { | |
| "loss": 1.1795, | |
| "grad_norm": 2.2503092288970947, | |
| "learning_rate": 0.00016300201255594604, | |
| "epoch": 0.19, | |
| "step": 12325 | |
| }, | |
| { | |
| "loss": 1.2146, | |
| "grad_norm": 1.0414721965789795, | |
| "learning_rate": 0.00016292691718482473, | |
| "epoch": 0.19, | |
| "step": 12350 | |
| }, | |
| { | |
| "loss": 1.2338, | |
| "grad_norm": 1.7036515474319458, | |
| "learning_rate": 0.00016285182181370342, | |
| "epoch": 0.19, | |
| "step": 12375 | |
| }, | |
| { | |
| "loss": 1.2638, | |
| "grad_norm": 1.2566134929656982, | |
| "learning_rate": 0.0001627767264425821, | |
| "epoch": 0.19, | |
| "step": 12400 | |
| }, | |
| { | |
| "loss": 1.1694, | |
| "grad_norm": 1.4524366855621338, | |
| "learning_rate": 0.00016270163107146078, | |
| "epoch": 0.19, | |
| "step": 12425 | |
| }, | |
| { | |
| "loss": 1.1826, | |
| "grad_norm": 1.2547303438186646, | |
| "learning_rate": 0.00016262653570033945, | |
| "epoch": 0.19, | |
| "step": 12450 | |
| }, | |
| { | |
| "loss": 1.1885, | |
| "grad_norm": 3.284105062484741, | |
| "learning_rate": 0.0001625514403292181, | |
| "epoch": 0.19, | |
| "step": 12475 | |
| }, | |
| { | |
| "loss": 1.1785, | |
| "grad_norm": 1.14161217212677, | |
| "learning_rate": 0.00016247634495809678, | |
| "epoch": 0.19, | |
| "step": 12500 | |
| }, | |
| { | |
| "loss": 1.156, | |
| "grad_norm": 1.9379956722259521, | |
| "learning_rate": 0.00016240124958697547, | |
| "epoch": 0.19, | |
| "step": 12525 | |
| }, | |
| { | |
| "loss": 1.1911, | |
| "grad_norm": 1.1594531536102295, | |
| "learning_rate": 0.00016232615421585413, | |
| "epoch": 0.19, | |
| "step": 12550 | |
| }, | |
| { | |
| "loss": 1.1905, | |
| "grad_norm": 1.3584635257720947, | |
| "learning_rate": 0.00016225105884473283, | |
| "epoch": 0.19, | |
| "step": 12575 | |
| }, | |
| { | |
| "loss": 1.2023, | |
| "grad_norm": 1.402160406112671, | |
| "learning_rate": 0.0001621759634736115, | |
| "epoch": 0.19, | |
| "step": 12600 | |
| }, | |
| { | |
| "loss": 1.235, | |
| "grad_norm": 1.3611042499542236, | |
| "learning_rate": 0.00016210086810249016, | |
| "epoch": 0.19, | |
| "step": 12625 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 1.1458463668823242, | |
| "learning_rate": 0.00016202577273136885, | |
| "epoch": 0.19, | |
| "step": 12650 | |
| }, | |
| { | |
| "loss": 1.1829, | |
| "grad_norm": 0.8500710725784302, | |
| "learning_rate": 0.00016195067736024752, | |
| "epoch": 0.19, | |
| "step": 12675 | |
| }, | |
| { | |
| "loss": 1.1632, | |
| "grad_norm": 1.5723693370819092, | |
| "learning_rate": 0.0001618755819891262, | |
| "epoch": 0.19, | |
| "step": 12700 | |
| }, | |
| { | |
| "loss": 1.1982, | |
| "grad_norm": 1.399224042892456, | |
| "learning_rate": 0.00016180048661800487, | |
| "epoch": 0.19, | |
| "step": 12725 | |
| }, | |
| { | |
| "loss": 1.2511, | |
| "grad_norm": 2.703968048095703, | |
| "learning_rate": 0.00016172539124688354, | |
| "epoch": 0.19, | |
| "step": 12750 | |
| }, | |
| { | |
| "loss": 1.1905, | |
| "grad_norm": 1.6090725660324097, | |
| "learning_rate": 0.00016165029587576223, | |
| "epoch": 0.19, | |
| "step": 12775 | |
| }, | |
| { | |
| "loss": 1.2074, | |
| "grad_norm": 2.323432207107544, | |
| "learning_rate": 0.0001615752005046409, | |
| "epoch": 0.19, | |
| "step": 12800 | |
| }, | |
| { | |
| "loss": 1.2514, | |
| "grad_norm": 1.0441837310791016, | |
| "learning_rate": 0.0001615001051335196, | |
| "epoch": 0.19, | |
| "step": 12825 | |
| }, | |
| { | |
| "loss": 1.2018, | |
| "grad_norm": 1.3072987794876099, | |
| "learning_rate": 0.00016142500976239825, | |
| "epoch": 0.19, | |
| "step": 12850 | |
| }, | |
| { | |
| "loss": 1.169, | |
| "grad_norm": 2.1105244159698486, | |
| "learning_rate": 0.00016134991439127695, | |
| "epoch": 0.19, | |
| "step": 12875 | |
| }, | |
| { | |
| "loss": 1.2361, | |
| "grad_norm": 1.4109976291656494, | |
| "learning_rate": 0.0001612748190201556, | |
| "epoch": 0.19, | |
| "step": 12900 | |
| }, | |
| { | |
| "loss": 1.2543, | |
| "grad_norm": 1.5119200944900513, | |
| "learning_rate": 0.00016119972364903428, | |
| "epoch": 0.19, | |
| "step": 12925 | |
| }, | |
| { | |
| "loss": 1.2326, | |
| "grad_norm": 1.3456885814666748, | |
| "learning_rate": 0.00016112462827791294, | |
| "epoch": 0.19, | |
| "step": 12950 | |
| }, | |
| { | |
| "loss": 1.1761, | |
| "grad_norm": 2.7535812854766846, | |
| "learning_rate": 0.00016104953290679163, | |
| "epoch": 0.19, | |
| "step": 12975 | |
| }, | |
| { | |
| "loss": 1.1413, | |
| "grad_norm": 1.665337085723877, | |
| "learning_rate": 0.00016097443753567033, | |
| "epoch": 0.2, | |
| "step": 13000 | |
| }, | |
| { | |
| "loss": 1.1836, | |
| "grad_norm": 1.1174890995025635, | |
| "learning_rate": 0.000160899342164549, | |
| "epoch": 0.2, | |
| "step": 13025 | |
| }, | |
| { | |
| "loss": 1.1405, | |
| "grad_norm": 2.4042136669158936, | |
| "learning_rate": 0.00016082424679342766, | |
| "epoch": 0.2, | |
| "step": 13050 | |
| }, | |
| { | |
| "loss": 1.2339, | |
| "grad_norm": 2.3713090419769287, | |
| "learning_rate": 0.00016074915142230632, | |
| "epoch": 0.2, | |
| "step": 13075 | |
| }, | |
| { | |
| "loss": 1.2351, | |
| "grad_norm": 1.7716904878616333, | |
| "learning_rate": 0.00016067405605118502, | |
| "epoch": 0.2, | |
| "step": 13100 | |
| }, | |
| { | |
| "loss": 1.194, | |
| "grad_norm": 1.2277339696884155, | |
| "learning_rate": 0.00016059896068006368, | |
| "epoch": 0.2, | |
| "step": 13125 | |
| }, | |
| { | |
| "loss": 1.2429, | |
| "grad_norm": 1.2725192308425903, | |
| "learning_rate": 0.00016052386530894237, | |
| "epoch": 0.2, | |
| "step": 13150 | |
| }, | |
| { | |
| "loss": 1.1837, | |
| "grad_norm": 1.4028089046478271, | |
| "learning_rate": 0.00016044876993782104, | |
| "epoch": 0.2, | |
| "step": 13175 | |
| }, | |
| { | |
| "loss": 1.2633, | |
| "grad_norm": 3.1674065589904785, | |
| "learning_rate": 0.0001603736745666997, | |
| "epoch": 0.2, | |
| "step": 13200 | |
| }, | |
| { | |
| "loss": 1.2575, | |
| "grad_norm": 1.3717881441116333, | |
| "learning_rate": 0.0001602985791955784, | |
| "epoch": 0.2, | |
| "step": 13225 | |
| }, | |
| { | |
| "loss": 1.2385, | |
| "grad_norm": 1.5640596151351929, | |
| "learning_rate": 0.00016022348382445706, | |
| "epoch": 0.2, | |
| "step": 13250 | |
| }, | |
| { | |
| "loss": 1.231, | |
| "grad_norm": 1.336003303527832, | |
| "learning_rate": 0.00016014838845333575, | |
| "epoch": 0.2, | |
| "step": 13275 | |
| }, | |
| { | |
| "loss": 1.1247, | |
| "grad_norm": 1.0398321151733398, | |
| "learning_rate": 0.00016007329308221442, | |
| "epoch": 0.2, | |
| "step": 13300 | |
| }, | |
| { | |
| "loss": 1.1848, | |
| "grad_norm": 1.5215067863464355, | |
| "learning_rate": 0.0001599981977110931, | |
| "epoch": 0.2, | |
| "step": 13325 | |
| }, | |
| { | |
| "loss": 1.2053, | |
| "grad_norm": 1.184665560722351, | |
| "learning_rate": 0.00015992310233997178, | |
| "epoch": 0.2, | |
| "step": 13350 | |
| }, | |
| { | |
| "loss": 1.2213, | |
| "grad_norm": 3.2756311893463135, | |
| "learning_rate": 0.00015984800696885044, | |
| "epoch": 0.2, | |
| "step": 13375 | |
| }, | |
| { | |
| "loss": 1.1623, | |
| "grad_norm": 2.2092206478118896, | |
| "learning_rate": 0.0001597729115977291, | |
| "epoch": 0.2, | |
| "step": 13400 | |
| }, | |
| { | |
| "loss": 1.1939, | |
| "grad_norm": 1.701504111289978, | |
| "learning_rate": 0.0001596978162266078, | |
| "epoch": 0.2, | |
| "step": 13425 | |
| }, | |
| { | |
| "loss": 1.194, | |
| "grad_norm": 1.0575650930404663, | |
| "learning_rate": 0.0001596227208554865, | |
| "epoch": 0.2, | |
| "step": 13450 | |
| }, | |
| { | |
| "loss": 1.1561, | |
| "grad_norm": 2.7198948860168457, | |
| "learning_rate": 0.00015954762548436516, | |
| "epoch": 0.2, | |
| "step": 13475 | |
| }, | |
| { | |
| "loss": 1.1449, | |
| "grad_norm": 1.2031759023666382, | |
| "learning_rate": 0.00015947253011324382, | |
| "epoch": 0.2, | |
| "step": 13500 | |
| }, | |
| { | |
| "loss": 1.195, | |
| "grad_norm": 1.3267816305160522, | |
| "learning_rate": 0.0001593974347421225, | |
| "epoch": 0.2, | |
| "step": 13525 | |
| }, | |
| { | |
| "loss": 1.1566, | |
| "grad_norm": 1.4941660165786743, | |
| "learning_rate": 0.00015932233937100118, | |
| "epoch": 0.2, | |
| "step": 13550 | |
| }, | |
| { | |
| "loss": 1.1218, | |
| "grad_norm": 0.9819481372833252, | |
| "learning_rate": 0.00015924724399987985, | |
| "epoch": 0.2, | |
| "step": 13575 | |
| }, | |
| { | |
| "loss": 1.2457, | |
| "grad_norm": 1.1329920291900635, | |
| "learning_rate": 0.00015917214862875854, | |
| "epoch": 0.2, | |
| "step": 13600 | |
| }, | |
| { | |
| "loss": 1.2218, | |
| "grad_norm": 1.0208684206008911, | |
| "learning_rate": 0.0001590970532576372, | |
| "epoch": 0.2, | |
| "step": 13625 | |
| }, | |
| { | |
| "loss": 1.3095, | |
| "grad_norm": 1.9692599773406982, | |
| "learning_rate": 0.00015902195788651587, | |
| "epoch": 0.21, | |
| "step": 13650 | |
| }, | |
| { | |
| "loss": 1.1426, | |
| "grad_norm": 1.1488243341445923, | |
| "learning_rate": 0.00015894686251539456, | |
| "epoch": 0.21, | |
| "step": 13675 | |
| }, | |
| { | |
| "loss": 1.1786, | |
| "grad_norm": 2.137523651123047, | |
| "learning_rate": 0.00015887176714427323, | |
| "epoch": 0.21, | |
| "step": 13700 | |
| }, | |
| { | |
| "loss": 1.2123, | |
| "grad_norm": 1.74925696849823, | |
| "learning_rate": 0.00015879667177315192, | |
| "epoch": 0.21, | |
| "step": 13725 | |
| }, | |
| { | |
| "loss": 1.2237, | |
| "grad_norm": 1.931201457977295, | |
| "learning_rate": 0.00015872157640203058, | |
| "epoch": 0.21, | |
| "step": 13750 | |
| }, | |
| { | |
| "loss": 1.1822, | |
| "grad_norm": 1.3742233514785767, | |
| "learning_rate": 0.00015864648103090928, | |
| "epoch": 0.21, | |
| "step": 13775 | |
| }, | |
| { | |
| "loss": 1.2393, | |
| "grad_norm": 1.860449194908142, | |
| "learning_rate": 0.00015857138565978794, | |
| "epoch": 0.21, | |
| "step": 13800 | |
| }, | |
| { | |
| "loss": 1.16, | |
| "grad_norm": 2.664776086807251, | |
| "learning_rate": 0.0001584962902886666, | |
| "epoch": 0.21, | |
| "step": 13825 | |
| }, | |
| { | |
| "loss": 1.172, | |
| "grad_norm": 2.5164761543273926, | |
| "learning_rate": 0.00015842119491754527, | |
| "epoch": 0.21, | |
| "step": 13850 | |
| }, | |
| { | |
| "loss": 1.1531, | |
| "grad_norm": 1.644278645515442, | |
| "learning_rate": 0.00015834609954642397, | |
| "epoch": 0.21, | |
| "step": 13875 | |
| }, | |
| { | |
| "loss": 1.2801, | |
| "grad_norm": 1.2100858688354492, | |
| "learning_rate": 0.00015827100417530266, | |
| "epoch": 0.21, | |
| "step": 13900 | |
| }, | |
| { | |
| "loss": 1.2011, | |
| "grad_norm": 1.9542933702468872, | |
| "learning_rate": 0.00015819590880418132, | |
| "epoch": 0.21, | |
| "step": 13925 | |
| }, | |
| { | |
| "loss": 1.2344, | |
| "grad_norm": 1.1991852521896362, | |
| "learning_rate": 0.00015812081343306, | |
| "epoch": 0.21, | |
| "step": 13950 | |
| }, | |
| { | |
| "loss": 1.1884, | |
| "grad_norm": 1.9113025665283203, | |
| "learning_rate": 0.00015804571806193865, | |
| "epoch": 0.21, | |
| "step": 13975 | |
| }, | |
| { | |
| "loss": 1.242, | |
| "grad_norm": 1.4621787071228027, | |
| "learning_rate": 0.00015797062269081735, | |
| "epoch": 0.21, | |
| "step": 14000 | |
| }, | |
| { | |
| "loss": 1.1961, | |
| "grad_norm": 1.9302442073822021, | |
| "learning_rate": 0.000157895527319696, | |
| "epoch": 0.21, | |
| "step": 14025 | |
| }, | |
| { | |
| "loss": 1.2159, | |
| "grad_norm": 1.3267945051193237, | |
| "learning_rate": 0.0001578204319485747, | |
| "epoch": 0.21, | |
| "step": 14050 | |
| }, | |
| { | |
| "loss": 1.1573, | |
| "grad_norm": 1.2569104433059692, | |
| "learning_rate": 0.0001577453365774534, | |
| "epoch": 0.21, | |
| "step": 14075 | |
| }, | |
| { | |
| "loss": 1.2149, | |
| "grad_norm": 1.3353804349899292, | |
| "learning_rate": 0.00015767024120633203, | |
| "epoch": 0.21, | |
| "step": 14100 | |
| }, | |
| { | |
| "loss": 1.198, | |
| "grad_norm": 1.9309898614883423, | |
| "learning_rate": 0.00015759514583521073, | |
| "epoch": 0.21, | |
| "step": 14125 | |
| }, | |
| { | |
| "loss": 1.1742, | |
| "grad_norm": 1.2149921655654907, | |
| "learning_rate": 0.0001575200504640894, | |
| "epoch": 0.21, | |
| "step": 14150 | |
| }, | |
| { | |
| "loss": 1.1855, | |
| "grad_norm": 1.9573317766189575, | |
| "learning_rate": 0.00015744495509296808, | |
| "epoch": 0.21, | |
| "step": 14175 | |
| }, | |
| { | |
| "loss": 1.2459, | |
| "grad_norm": 1.384567379951477, | |
| "learning_rate": 0.00015736985972184675, | |
| "epoch": 0.21, | |
| "step": 14200 | |
| }, | |
| { | |
| "loss": 1.1853, | |
| "grad_norm": 1.7285842895507812, | |
| "learning_rate": 0.00015729476435072544, | |
| "epoch": 0.21, | |
| "step": 14225 | |
| }, | |
| { | |
| "loss": 1.1728, | |
| "grad_norm": 2.050541877746582, | |
| "learning_rate": 0.0001572196689796041, | |
| "epoch": 0.21, | |
| "step": 14250 | |
| }, | |
| { | |
| "loss": 1.2248, | |
| "grad_norm": 1.735643744468689, | |
| "learning_rate": 0.00015714457360848277, | |
| "epoch": 0.21, | |
| "step": 14275 | |
| }, | |
| { | |
| "loss": 1.1792, | |
| "grad_norm": 1.511836290359497, | |
| "learning_rate": 0.00015706947823736147, | |
| "epoch": 0.21, | |
| "step": 14300 | |
| }, | |
| { | |
| "loss": 1.1978, | |
| "grad_norm": 1.1453663110733032, | |
| "learning_rate": 0.00015699438286624013, | |
| "epoch": 0.22, | |
| "step": 14325 | |
| }, | |
| { | |
| "loss": 1.1747, | |
| "grad_norm": 1.8787868022918701, | |
| "learning_rate": 0.00015691928749511882, | |
| "epoch": 0.22, | |
| "step": 14350 | |
| }, | |
| { | |
| "loss": 1.1946, | |
| "grad_norm": 2.0433459281921387, | |
| "learning_rate": 0.0001568441921239975, | |
| "epoch": 0.22, | |
| "step": 14375 | |
| }, | |
| { | |
| "loss": 1.1676, | |
| "grad_norm": 1.6258106231689453, | |
| "learning_rate": 0.00015676909675287615, | |
| "epoch": 0.22, | |
| "step": 14400 | |
| }, | |
| { | |
| "loss": 1.1486, | |
| "grad_norm": 1.0429004430770874, | |
| "learning_rate": 0.00015669400138175482, | |
| "epoch": 0.22, | |
| "step": 14425 | |
| }, | |
| { | |
| "loss": 1.2211, | |
| "grad_norm": 1.5074403285980225, | |
| "learning_rate": 0.0001566189060106335, | |
| "epoch": 0.22, | |
| "step": 14450 | |
| }, | |
| { | |
| "loss": 1.2161, | |
| "grad_norm": 1.4326659440994263, | |
| "learning_rate": 0.00015654381063951218, | |
| "epoch": 0.22, | |
| "step": 14475 | |
| }, | |
| { | |
| "loss": 1.2538, | |
| "grad_norm": 1.8539921045303345, | |
| "learning_rate": 0.00015646871526839087, | |
| "epoch": 0.22, | |
| "step": 14500 | |
| }, | |
| { | |
| "loss": 1.1929, | |
| "grad_norm": 1.7635362148284912, | |
| "learning_rate": 0.00015639361989726956, | |
| "epoch": 0.22, | |
| "step": 14525 | |
| }, | |
| { | |
| "loss": 1.2115, | |
| "grad_norm": 1.3895171880722046, | |
| "learning_rate": 0.00015631852452614823, | |
| "epoch": 0.22, | |
| "step": 14550 | |
| }, | |
| { | |
| "loss": 1.1723, | |
| "grad_norm": 1.5900187492370605, | |
| "learning_rate": 0.0001562434291550269, | |
| "epoch": 0.22, | |
| "step": 14575 | |
| }, | |
| { | |
| "loss": 1.186, | |
| "grad_norm": 1.7074415683746338, | |
| "learning_rate": 0.00015616833378390556, | |
| "epoch": 0.22, | |
| "step": 14600 | |
| }, | |
| { | |
| "loss": 1.187, | |
| "grad_norm": 1.3961682319641113, | |
| "learning_rate": 0.00015609323841278425, | |
| "epoch": 0.22, | |
| "step": 14625 | |
| }, | |
| { | |
| "loss": 1.1981, | |
| "grad_norm": 1.4976271390914917, | |
| "learning_rate": 0.00015601814304166292, | |
| "epoch": 0.22, | |
| "step": 14650 | |
| }, | |
| { | |
| "loss": 1.1749, | |
| "grad_norm": 1.286617398262024, | |
| "learning_rate": 0.0001559430476705416, | |
| "epoch": 0.22, | |
| "step": 14675 | |
| }, | |
| { | |
| "loss": 1.1506, | |
| "grad_norm": 1.8841774463653564, | |
| "learning_rate": 0.00015586795229942027, | |
| "epoch": 0.22, | |
| "step": 14700 | |
| }, | |
| { | |
| "loss": 1.1846, | |
| "grad_norm": 2.3921959400177, | |
| "learning_rate": 0.00015579285692829894, | |
| "epoch": 0.22, | |
| "step": 14725 | |
| }, | |
| { | |
| "loss": 1.1553, | |
| "grad_norm": 1.139286756515503, | |
| "learning_rate": 0.00015571776155717763, | |
| "epoch": 0.22, | |
| "step": 14750 | |
| }, | |
| { | |
| "loss": 1.213, | |
| "grad_norm": 1.5389468669891357, | |
| "learning_rate": 0.0001556426661860563, | |
| "epoch": 0.22, | |
| "step": 14775 | |
| }, | |
| { | |
| "loss": 1.2504, | |
| "grad_norm": 1.1002377271652222, | |
| "learning_rate": 0.000155567570814935, | |
| "epoch": 0.22, | |
| "step": 14800 | |
| }, | |
| { | |
| "loss": 1.2369, | |
| "grad_norm": 1.2907332181930542, | |
| "learning_rate": 0.00015549247544381365, | |
| "epoch": 0.22, | |
| "step": 14825 | |
| }, | |
| { | |
| "loss": 1.2327, | |
| "grad_norm": 2.8189125061035156, | |
| "learning_rate": 0.00015541738007269232, | |
| "epoch": 0.22, | |
| "step": 14850 | |
| }, | |
| { | |
| "loss": 1.2142, | |
| "grad_norm": 1.4760026931762695, | |
| "learning_rate": 0.00015534228470157098, | |
| "epoch": 0.22, | |
| "step": 14875 | |
| }, | |
| { | |
| "loss": 1.2538, | |
| "grad_norm": 1.4497836828231812, | |
| "learning_rate": 0.00015526718933044968, | |
| "epoch": 0.22, | |
| "step": 14900 | |
| }, | |
| { | |
| "loss": 1.2757, | |
| "grad_norm": 1.2099194526672363, | |
| "learning_rate": 0.00015519209395932834, | |
| "epoch": 0.22, | |
| "step": 14925 | |
| }, | |
| { | |
| "loss": 1.2636, | |
| "grad_norm": 1.2008768320083618, | |
| "learning_rate": 0.00015511699858820703, | |
| "epoch": 0.22, | |
| "step": 14950 | |
| }, | |
| { | |
| "loss": 1.165, | |
| "grad_norm": 4.421905040740967, | |
| "learning_rate": 0.00015504190321708573, | |
| "epoch": 0.22, | |
| "step": 14975 | |
| }, | |
| { | |
| "loss": 1.164, | |
| "grad_norm": 1.2725390195846558, | |
| "learning_rate": 0.0001549668078459644, | |
| "epoch": 0.23, | |
| "step": 15000 | |
| }, | |
| { | |
| "loss": 1.2026, | |
| "grad_norm": 2.9403913021087646, | |
| "learning_rate": 0.00015489171247484306, | |
| "epoch": 0.23, | |
| "step": 15025 | |
| }, | |
| { | |
| "loss": 1.1938, | |
| "grad_norm": 1.8553730249404907, | |
| "learning_rate": 0.00015481661710372172, | |
| "epoch": 0.23, | |
| "step": 15050 | |
| }, | |
| { | |
| "loss": 1.1879, | |
| "grad_norm": 1.242799162864685, | |
| "learning_rate": 0.00015474152173260042, | |
| "epoch": 0.23, | |
| "step": 15075 | |
| }, | |
| { | |
| "loss": 1.1512, | |
| "grad_norm": 1.5785107612609863, | |
| "learning_rate": 0.00015466642636147908, | |
| "epoch": 0.23, | |
| "step": 15100 | |
| }, | |
| { | |
| "loss": 1.1802, | |
| "grad_norm": 2.665036916732788, | |
| "learning_rate": 0.00015459133099035777, | |
| "epoch": 0.23, | |
| "step": 15125 | |
| }, | |
| { | |
| "loss": 1.1603, | |
| "grad_norm": 1.8509407043457031, | |
| "learning_rate": 0.00015451623561923644, | |
| "epoch": 0.23, | |
| "step": 15150 | |
| }, | |
| { | |
| "loss": 1.1711, | |
| "grad_norm": 1.2315629720687866, | |
| "learning_rate": 0.0001544411402481151, | |
| "epoch": 0.23, | |
| "step": 15175 | |
| }, | |
| { | |
| "loss": 1.1784, | |
| "grad_norm": 1.6980071067810059, | |
| "learning_rate": 0.0001543660448769938, | |
| "epoch": 0.23, | |
| "step": 15200 | |
| }, | |
| { | |
| "loss": 1.2922, | |
| "grad_norm": 1.1929773092269897, | |
| "learning_rate": 0.00015429094950587246, | |
| "epoch": 0.23, | |
| "step": 15225 | |
| }, | |
| { | |
| "loss": 1.1272, | |
| "grad_norm": 1.722090244293213, | |
| "learning_rate": 0.00015421585413475115, | |
| "epoch": 0.23, | |
| "step": 15250 | |
| }, | |
| { | |
| "loss": 1.2982, | |
| "grad_norm": 1.712141990661621, | |
| "learning_rate": 0.00015414075876362982, | |
| "epoch": 0.23, | |
| "step": 15275 | |
| }, | |
| { | |
| "loss": 1.237, | |
| "grad_norm": 2.6743271350860596, | |
| "learning_rate": 0.00015406566339250848, | |
| "epoch": 0.23, | |
| "step": 15300 | |
| }, | |
| { | |
| "loss": 1.1982, | |
| "grad_norm": 1.842942714691162, | |
| "learning_rate": 0.00015399056802138715, | |
| "epoch": 0.23, | |
| "step": 15325 | |
| }, | |
| { | |
| "loss": 1.2102, | |
| "grad_norm": 1.8020812273025513, | |
| "learning_rate": 0.00015391547265026584, | |
| "epoch": 0.23, | |
| "step": 15350 | |
| }, | |
| { | |
| "loss": 1.2009, | |
| "grad_norm": 1.4913078546524048, | |
| "learning_rate": 0.00015384037727914453, | |
| "epoch": 0.23, | |
| "step": 15375 | |
| }, | |
| { | |
| "loss": 1.2133, | |
| "grad_norm": 1.1852643489837646, | |
| "learning_rate": 0.0001537652819080232, | |
| "epoch": 0.23, | |
| "step": 15400 | |
| }, | |
| { | |
| "loss": 1.2375, | |
| "grad_norm": 1.9560911655426025, | |
| "learning_rate": 0.0001536901865369019, | |
| "epoch": 0.23, | |
| "step": 15425 | |
| }, | |
| { | |
| "loss": 1.2484, | |
| "grad_norm": 1.743415355682373, | |
| "learning_rate": 0.00015361509116578056, | |
| "epoch": 0.23, | |
| "step": 15450 | |
| }, | |
| { | |
| "loss": 1.1939, | |
| "grad_norm": 2.6720640659332275, | |
| "learning_rate": 0.00015353999579465922, | |
| "epoch": 0.23, | |
| "step": 15475 | |
| }, | |
| { | |
| "loss": 1.2031, | |
| "grad_norm": 1.5238986015319824, | |
| "learning_rate": 0.0001534649004235379, | |
| "epoch": 0.23, | |
| "step": 15500 | |
| }, | |
| { | |
| "loss": 1.2155, | |
| "grad_norm": 1.7103843688964844, | |
| "learning_rate": 0.00015338980505241658, | |
| "epoch": 0.23, | |
| "step": 15525 | |
| }, | |
| { | |
| "loss": 1.2001, | |
| "grad_norm": 1.6735540628433228, | |
| "learning_rate": 0.00015331470968129525, | |
| "epoch": 0.23, | |
| "step": 15550 | |
| }, | |
| { | |
| "loss": 1.1737, | |
| "grad_norm": 1.4866646528244019, | |
| "learning_rate": 0.00015323961431017394, | |
| "epoch": 0.23, | |
| "step": 15575 | |
| }, | |
| { | |
| "loss": 1.2778, | |
| "grad_norm": 1.4038907289505005, | |
| "learning_rate": 0.0001531645189390526, | |
| "epoch": 0.23, | |
| "step": 15600 | |
| }, | |
| { | |
| "loss": 1.1966, | |
| "grad_norm": 2.238800048828125, | |
| "learning_rate": 0.00015308942356793127, | |
| "epoch": 0.23, | |
| "step": 15625 | |
| }, | |
| { | |
| "loss": 1.2119, | |
| "grad_norm": 1.6463327407836914, | |
| "learning_rate": 0.00015301432819680996, | |
| "epoch": 0.24, | |
| "step": 15650 | |
| }, | |
| { | |
| "loss": 1.2049, | |
| "grad_norm": 1.1655962467193604, | |
| "learning_rate": 0.00015293923282568863, | |
| "epoch": 0.24, | |
| "step": 15675 | |
| }, | |
| { | |
| "loss": 1.1357, | |
| "grad_norm": 1.2663848400115967, | |
| "learning_rate": 0.00015286413745456732, | |
| "epoch": 0.24, | |
| "step": 15700 | |
| }, | |
| { | |
| "loss": 1.2133, | |
| "grad_norm": 1.140039324760437, | |
| "learning_rate": 0.00015278904208344598, | |
| "epoch": 0.24, | |
| "step": 15725 | |
| }, | |
| { | |
| "loss": 1.174, | |
| "grad_norm": 2.119586944580078, | |
| "learning_rate": 0.00015271394671232465, | |
| "epoch": 0.24, | |
| "step": 15750 | |
| }, | |
| { | |
| "loss": 1.2107, | |
| "grad_norm": 1.7722172737121582, | |
| "learning_rate": 0.00015263885134120332, | |
| "epoch": 0.24, | |
| "step": 15775 | |
| }, | |
| { | |
| "loss": 1.2139, | |
| "grad_norm": 1.7310364246368408, | |
| "learning_rate": 0.000152563755970082, | |
| "epoch": 0.24, | |
| "step": 15800 | |
| }, | |
| { | |
| "loss": 1.126, | |
| "grad_norm": 0.9670734405517578, | |
| "learning_rate": 0.0001524886605989607, | |
| "epoch": 0.24, | |
| "step": 15825 | |
| }, | |
| { | |
| "loss": 1.2521, | |
| "grad_norm": 2.075798511505127, | |
| "learning_rate": 0.00015241356522783937, | |
| "epoch": 0.24, | |
| "step": 15850 | |
| }, | |
| { | |
| "loss": 1.2131, | |
| "grad_norm": 1.7291430234909058, | |
| "learning_rate": 0.00015233846985671806, | |
| "epoch": 0.24, | |
| "step": 15875 | |
| }, | |
| { | |
| "loss": 1.2042, | |
| "grad_norm": 2.976837635040283, | |
| "learning_rate": 0.00015226337448559672, | |
| "epoch": 0.24, | |
| "step": 15900 | |
| }, | |
| { | |
| "loss": 1.2391, | |
| "grad_norm": 1.3992162942886353, | |
| "learning_rate": 0.0001521882791144754, | |
| "epoch": 0.24, | |
| "step": 15925 | |
| }, | |
| { | |
| "loss": 1.1702, | |
| "grad_norm": 0.8179588317871094, | |
| "learning_rate": 0.00015211618755819892, | |
| "epoch": 0.24, | |
| "step": 15950 | |
| }, | |
| { | |
| "loss": 1.1432, | |
| "grad_norm": 1.6531869173049927, | |
| "learning_rate": 0.0001520410921870776, | |
| "epoch": 0.24, | |
| "step": 15975 | |
| }, | |
| { | |
| "loss": 1.1531, | |
| "grad_norm": 2.893293857574463, | |
| "learning_rate": 0.00015196599681595627, | |
| "epoch": 0.24, | |
| "step": 16000 | |
| }, | |
| { | |
| "loss": 1.1839, | |
| "grad_norm": 1.686982274055481, | |
| "learning_rate": 0.00015189090144483497, | |
| "epoch": 0.24, | |
| "step": 16025 | |
| }, | |
| { | |
| "loss": 1.1529, | |
| "grad_norm": 1.0813180208206177, | |
| "learning_rate": 0.0001518158060737136, | |
| "epoch": 0.24, | |
| "step": 16050 | |
| }, | |
| { | |
| "loss": 1.2914, | |
| "grad_norm": 1.8390347957611084, | |
| "learning_rate": 0.0001517407107025923, | |
| "epoch": 0.24, | |
| "step": 16075 | |
| }, | |
| { | |
| "loss": 1.2263, | |
| "grad_norm": 1.4947305917739868, | |
| "learning_rate": 0.00015166561533147096, | |
| "epoch": 0.24, | |
| "step": 16100 | |
| }, | |
| { | |
| "loss": 1.2098, | |
| "grad_norm": 1.0743931531906128, | |
| "learning_rate": 0.00015159051996034966, | |
| "epoch": 0.24, | |
| "step": 16125 | |
| }, | |
| { | |
| "loss": 1.1824, | |
| "grad_norm": 1.6704978942871094, | |
| "learning_rate": 0.00015151542458922832, | |
| "epoch": 0.24, | |
| "step": 16150 | |
| }, | |
| { | |
| "loss": 1.1727, | |
| "grad_norm": 1.23310387134552, | |
| "learning_rate": 0.000151440329218107, | |
| "epoch": 0.24, | |
| "step": 16175 | |
| }, | |
| { | |
| "loss": 1.1947, | |
| "grad_norm": 1.678554892539978, | |
| "learning_rate": 0.00015136523384698568, | |
| "epoch": 0.24, | |
| "step": 16200 | |
| }, | |
| { | |
| "loss": 1.2033, | |
| "grad_norm": 1.4678512811660767, | |
| "learning_rate": 0.00015129013847586434, | |
| "epoch": 0.24, | |
| "step": 16225 | |
| }, | |
| { | |
| "loss": 1.2855, | |
| "grad_norm": 2.2149295806884766, | |
| "learning_rate": 0.00015121504310474304, | |
| "epoch": 0.24, | |
| "step": 16250 | |
| }, | |
| { | |
| "loss": 1.1601, | |
| "grad_norm": 0.9399513006210327, | |
| "learning_rate": 0.0001511399477336217, | |
| "epoch": 0.24, | |
| "step": 16275 | |
| }, | |
| { | |
| "loss": 1.1618, | |
| "grad_norm": 1.5738555192947388, | |
| "learning_rate": 0.0001510648523625004, | |
| "epoch": 0.24, | |
| "step": 16300 | |
| }, | |
| { | |
| "loss": 1.1984, | |
| "grad_norm": 2.3447060585021973, | |
| "learning_rate": 0.00015098975699137906, | |
| "epoch": 0.25, | |
| "step": 16325 | |
| }, | |
| { | |
| "loss": 1.2129, | |
| "grad_norm": 2.5573129653930664, | |
| "learning_rate": 0.00015091466162025772, | |
| "epoch": 0.25, | |
| "step": 16350 | |
| }, | |
| { | |
| "loss": 1.2152, | |
| "grad_norm": 1.161568284034729, | |
| "learning_rate": 0.00015083956624913642, | |
| "epoch": 0.25, | |
| "step": 16375 | |
| }, | |
| { | |
| "loss": 1.1788, | |
| "grad_norm": 1.2641152143478394, | |
| "learning_rate": 0.00015076447087801508, | |
| "epoch": 0.25, | |
| "step": 16400 | |
| }, | |
| { | |
| "loss": 1.2635, | |
| "grad_norm": 1.1497838497161865, | |
| "learning_rate": 0.00015068937550689377, | |
| "epoch": 0.25, | |
| "step": 16425 | |
| }, | |
| { | |
| "loss": 1.2427, | |
| "grad_norm": 1.777820110321045, | |
| "learning_rate": 0.00015061428013577244, | |
| "epoch": 0.25, | |
| "step": 16450 | |
| }, | |
| { | |
| "loss": 1.2167, | |
| "grad_norm": 1.704571008682251, | |
| "learning_rate": 0.00015053918476465113, | |
| "epoch": 0.25, | |
| "step": 16475 | |
| }, | |
| { | |
| "loss": 1.1348, | |
| "grad_norm": 1.2531949281692505, | |
| "learning_rate": 0.00015046408939352977, | |
| "epoch": 0.25, | |
| "step": 16500 | |
| }, | |
| { | |
| "loss": 1.2118, | |
| "grad_norm": 2.0152504444122314, | |
| "learning_rate": 0.00015038899402240846, | |
| "epoch": 0.25, | |
| "step": 16525 | |
| }, | |
| { | |
| "loss": 1.2169, | |
| "grad_norm": 1.327596664428711, | |
| "learning_rate": 0.00015031389865128713, | |
| "epoch": 0.25, | |
| "step": 16550 | |
| }, | |
| { | |
| "loss": 1.1057, | |
| "grad_norm": 2.2122318744659424, | |
| "learning_rate": 0.00015023880328016582, | |
| "epoch": 0.25, | |
| "step": 16575 | |
| }, | |
| { | |
| "loss": 1.1939, | |
| "grad_norm": 1.4037036895751953, | |
| "learning_rate": 0.0001501637079090445, | |
| "epoch": 0.25, | |
| "step": 16600 | |
| }, | |
| { | |
| "loss": 1.1178, | |
| "grad_norm": 1.947090983390808, | |
| "learning_rate": 0.00015008861253792318, | |
| "epoch": 0.25, | |
| "step": 16625 | |
| }, | |
| { | |
| "loss": 1.2499, | |
| "grad_norm": 1.9275078773498535, | |
| "learning_rate": 0.00015001351716680184, | |
| "epoch": 0.25, | |
| "step": 16650 | |
| }, | |
| { | |
| "loss": 1.203, | |
| "grad_norm": 1.6140542030334473, | |
| "learning_rate": 0.0001499384217956805, | |
| "epoch": 0.25, | |
| "step": 16675 | |
| }, | |
| { | |
| "loss": 1.1617, | |
| "grad_norm": 1.370875358581543, | |
| "learning_rate": 0.0001498633264245592, | |
| "epoch": 0.25, | |
| "step": 16700 | |
| }, | |
| { | |
| "loss": 1.1351, | |
| "grad_norm": 2.523732900619507, | |
| "learning_rate": 0.00014978823105343787, | |
| "epoch": 0.25, | |
| "step": 16725 | |
| }, | |
| { | |
| "loss": 1.1793, | |
| "grad_norm": 1.3012944459915161, | |
| "learning_rate": 0.00014971313568231656, | |
| "epoch": 0.25, | |
| "step": 16750 | |
| }, | |
| { | |
| "loss": 1.1834, | |
| "grad_norm": 1.382142424583435, | |
| "learning_rate": 0.00014963804031119522, | |
| "epoch": 0.25, | |
| "step": 16775 | |
| }, | |
| { | |
| "loss": 1.1841, | |
| "grad_norm": 3.1386773586273193, | |
| "learning_rate": 0.0001495629449400739, | |
| "epoch": 0.25, | |
| "step": 16800 | |
| }, | |
| { | |
| "loss": 1.1846, | |
| "grad_norm": 1.6328222751617432, | |
| "learning_rate": 0.00014948784956895258, | |
| "epoch": 0.25, | |
| "step": 16825 | |
| }, | |
| { | |
| "loss": 1.1879, | |
| "grad_norm": 1.3339941501617432, | |
| "learning_rate": 0.00014941275419783125, | |
| "epoch": 0.25, | |
| "step": 16850 | |
| }, | |
| { | |
| "loss": 1.1679, | |
| "grad_norm": 2.250485897064209, | |
| "learning_rate": 0.00014933765882670994, | |
| "epoch": 0.25, | |
| "step": 16875 | |
| }, | |
| { | |
| "loss": 1.1362, | |
| "grad_norm": 2.045668363571167, | |
| "learning_rate": 0.0001492625634555886, | |
| "epoch": 0.25, | |
| "step": 16900 | |
| }, | |
| { | |
| "loss": 1.1654, | |
| "grad_norm": 1.1913504600524902, | |
| "learning_rate": 0.0001491874680844673, | |
| "epoch": 0.25, | |
| "step": 16925 | |
| }, | |
| { | |
| "loss": 1.2208, | |
| "grad_norm": 1.6065621376037598, | |
| "learning_rate": 0.00014911237271334594, | |
| "epoch": 0.25, | |
| "step": 16950 | |
| }, | |
| { | |
| "loss": 1.1542, | |
| "grad_norm": 1.5805847644805908, | |
| "learning_rate": 0.00014903727734222463, | |
| "epoch": 0.25, | |
| "step": 16975 | |
| }, | |
| { | |
| "loss": 1.2015, | |
| "grad_norm": 5.944768905639648, | |
| "learning_rate": 0.0001489621819711033, | |
| "epoch": 0.26, | |
| "step": 17000 | |
| }, | |
| { | |
| "loss": 1.1992, | |
| "grad_norm": 3.976229667663574, | |
| "learning_rate": 0.00014888708659998199, | |
| "epoch": 0.26, | |
| "step": 17025 | |
| }, | |
| { | |
| "loss": 1.1746, | |
| "grad_norm": 2.31911301612854, | |
| "learning_rate": 0.00014881199122886068, | |
| "epoch": 0.26, | |
| "step": 17050 | |
| }, | |
| { | |
| "loss": 1.1205, | |
| "grad_norm": 1.8674370050430298, | |
| "learning_rate": 0.00014873689585773934, | |
| "epoch": 0.26, | |
| "step": 17075 | |
| }, | |
| { | |
| "loss": 1.2318, | |
| "grad_norm": 1.6549973487854004, | |
| "learning_rate": 0.000148661800486618, | |
| "epoch": 0.26, | |
| "step": 17100 | |
| }, | |
| { | |
| "loss": 1.2004, | |
| "grad_norm": 1.3497843742370605, | |
| "learning_rate": 0.00014858670511549667, | |
| "epoch": 0.26, | |
| "step": 17125 | |
| }, | |
| { | |
| "loss": 1.2421, | |
| "grad_norm": 1.8397778272628784, | |
| "learning_rate": 0.00014851160974437537, | |
| "epoch": 0.26, | |
| "step": 17150 | |
| }, | |
| { | |
| "loss": 1.1316, | |
| "grad_norm": 0.9151533842086792, | |
| "learning_rate": 0.00014843651437325403, | |
| "epoch": 0.26, | |
| "step": 17175 | |
| }, | |
| { | |
| "loss": 1.1847, | |
| "grad_norm": 1.389743447303772, | |
| "learning_rate": 0.00014836141900213272, | |
| "epoch": 0.26, | |
| "step": 17200 | |
| }, | |
| { | |
| "loss": 1.1785, | |
| "grad_norm": 0.9278027415275574, | |
| "learning_rate": 0.0001482863236310114, | |
| "epoch": 0.26, | |
| "step": 17225 | |
| }, | |
| { | |
| "loss": 1.1768, | |
| "grad_norm": 1.018211841583252, | |
| "learning_rate": 0.00014821122825989006, | |
| "epoch": 0.26, | |
| "step": 17250 | |
| }, | |
| { | |
| "loss": 1.1549, | |
| "grad_norm": 1.9112569093704224, | |
| "learning_rate": 0.00014813613288876875, | |
| "epoch": 0.26, | |
| "step": 17275 | |
| }, | |
| { | |
| "loss": 1.1876, | |
| "grad_norm": 1.2178176641464233, | |
| "learning_rate": 0.0001480610375176474, | |
| "epoch": 0.26, | |
| "step": 17300 | |
| }, | |
| { | |
| "loss": 1.2158, | |
| "grad_norm": 1.7924511432647705, | |
| "learning_rate": 0.0001479859421465261, | |
| "epoch": 0.26, | |
| "step": 17325 | |
| }, | |
| { | |
| "loss": 1.2083, | |
| "grad_norm": 2.1684257984161377, | |
| "learning_rate": 0.00014791084677540477, | |
| "epoch": 0.26, | |
| "step": 17350 | |
| }, | |
| { | |
| "loss": 1.1649, | |
| "grad_norm": 1.368639349937439, | |
| "learning_rate": 0.00014783575140428346, | |
| "epoch": 0.26, | |
| "step": 17375 | |
| }, | |
| { | |
| "loss": 1.2448, | |
| "grad_norm": 1.5606473684310913, | |
| "learning_rate": 0.0001477606560331621, | |
| "epoch": 0.26, | |
| "step": 17400 | |
| }, | |
| { | |
| "loss": 1.2516, | |
| "grad_norm": 1.3743770122528076, | |
| "learning_rate": 0.0001476855606620408, | |
| "epoch": 0.26, | |
| "step": 17425 | |
| }, | |
| { | |
| "loss": 1.1748, | |
| "grad_norm": 1.4341908693313599, | |
| "learning_rate": 0.00014761046529091946, | |
| "epoch": 0.26, | |
| "step": 17450 | |
| }, | |
| { | |
| "loss": 1.1752, | |
| "grad_norm": 2.299916982650757, | |
| "learning_rate": 0.00014753536991979815, | |
| "epoch": 0.26, | |
| "step": 17475 | |
| }, | |
| { | |
| "loss": 1.2068, | |
| "grad_norm": 2.3646254539489746, | |
| "learning_rate": 0.00014746027454867684, | |
| "epoch": 0.26, | |
| "step": 17500 | |
| }, | |
| { | |
| "loss": 1.171, | |
| "grad_norm": 2.4026846885681152, | |
| "learning_rate": 0.0001473851791775555, | |
| "epoch": 0.26, | |
| "step": 17525 | |
| }, | |
| { | |
| "loss": 1.2248, | |
| "grad_norm": 1.358500599861145, | |
| "learning_rate": 0.00014731008380643417, | |
| "epoch": 0.26, | |
| "step": 17550 | |
| }, | |
| { | |
| "loss": 1.2743, | |
| "grad_norm": 2.302159547805786, | |
| "learning_rate": 0.00014723498843531284, | |
| "epoch": 0.26, | |
| "step": 17575 | |
| }, | |
| { | |
| "loss": 1.1939, | |
| "grad_norm": 1.4632925987243652, | |
| "learning_rate": 0.00014715989306419153, | |
| "epoch": 0.26, | |
| "step": 17600 | |
| }, | |
| { | |
| "loss": 1.1962, | |
| "grad_norm": 3.442080020904541, | |
| "learning_rate": 0.0001470847976930702, | |
| "epoch": 0.26, | |
| "step": 17625 | |
| }, | |
| { | |
| "loss": 1.1649, | |
| "grad_norm": 0.879815936088562, | |
| "learning_rate": 0.0001470097023219489, | |
| "epoch": 0.27, | |
| "step": 17650 | |
| }, | |
| { | |
| "loss": 1.2207, | |
| "grad_norm": 1.877156376838684, | |
| "learning_rate": 0.00014693460695082758, | |
| "epoch": 0.27, | |
| "step": 17675 | |
| }, | |
| { | |
| "loss": 1.2056, | |
| "grad_norm": 1.6536662578582764, | |
| "learning_rate": 0.00014685951157970622, | |
| "epoch": 0.27, | |
| "step": 17700 | |
| }, | |
| { | |
| "loss": 1.1719, | |
| "grad_norm": 1.321970820426941, | |
| "learning_rate": 0.0001467844162085849, | |
| "epoch": 0.27, | |
| "step": 17725 | |
| }, | |
| { | |
| "loss": 1.2081, | |
| "grad_norm": 1.4853167533874512, | |
| "learning_rate": 0.00014670932083746358, | |
| "epoch": 0.27, | |
| "step": 17750 | |
| }, | |
| { | |
| "loss": 1.1692, | |
| "grad_norm": 1.9838991165161133, | |
| "learning_rate": 0.00014663422546634227, | |
| "epoch": 0.27, | |
| "step": 17775 | |
| }, | |
| { | |
| "loss": 1.1826, | |
| "grad_norm": 2.436300039291382, | |
| "learning_rate": 0.00014655913009522094, | |
| "epoch": 0.27, | |
| "step": 17800 | |
| }, | |
| { | |
| "loss": 1.1814, | |
| "grad_norm": 1.899038314819336, | |
| "learning_rate": 0.00014648403472409963, | |
| "epoch": 0.27, | |
| "step": 17825 | |
| }, | |
| { | |
| "loss": 1.2291, | |
| "grad_norm": 1.3306931257247925, | |
| "learning_rate": 0.00014640893935297827, | |
| "epoch": 0.27, | |
| "step": 17850 | |
| }, | |
| { | |
| "loss": 1.1888, | |
| "grad_norm": 1.6196904182434082, | |
| "learning_rate": 0.00014633384398185696, | |
| "epoch": 0.27, | |
| "step": 17875 | |
| }, | |
| { | |
| "loss": 1.2531, | |
| "grad_norm": 1.9150115251541138, | |
| "learning_rate": 0.00014625874861073565, | |
| "epoch": 0.27, | |
| "step": 17900 | |
| }, | |
| { | |
| "loss": 1.1236, | |
| "grad_norm": 1.7596296072006226, | |
| "learning_rate": 0.00014618365323961432, | |
| "epoch": 0.27, | |
| "step": 17925 | |
| }, | |
| { | |
| "loss": 1.264, | |
| "grad_norm": 2.536665678024292, | |
| "learning_rate": 0.000146108557868493, | |
| "epoch": 0.27, | |
| "step": 17950 | |
| }, | |
| { | |
| "loss": 1.2295, | |
| "grad_norm": 1.5203639268875122, | |
| "learning_rate": 0.00014603346249737167, | |
| "epoch": 0.27, | |
| "step": 17975 | |
| }, | |
| { | |
| "loss": 1.1534, | |
| "grad_norm": 1.316978931427002, | |
| "learning_rate": 0.00014595836712625034, | |
| "epoch": 0.27, | |
| "step": 18000 | |
| }, | |
| { | |
| "loss": 1.2754, | |
| "grad_norm": 1.4424588680267334, | |
| "learning_rate": 0.000145883271755129, | |
| "epoch": 0.27, | |
| "step": 18025 | |
| }, | |
| { | |
| "loss": 1.2349, | |
| "grad_norm": 2.4499781131744385, | |
| "learning_rate": 0.0001458081763840077, | |
| "epoch": 0.27, | |
| "step": 18050 | |
| }, | |
| { | |
| "loss": 1.1908, | |
| "grad_norm": 1.3816992044448853, | |
| "learning_rate": 0.00014573308101288636, | |
| "epoch": 0.27, | |
| "step": 18075 | |
| }, | |
| { | |
| "loss": 1.2685, | |
| "grad_norm": 1.1324695348739624, | |
| "learning_rate": 0.00014565798564176506, | |
| "epoch": 0.27, | |
| "step": 18100 | |
| }, | |
| { | |
| "loss": 1.1553, | |
| "grad_norm": 1.7215017080307007, | |
| "learning_rate": 0.00014558289027064375, | |
| "epoch": 0.27, | |
| "step": 18125 | |
| }, | |
| { | |
| "loss": 1.153, | |
| "grad_norm": 0.9789482355117798, | |
| "learning_rate": 0.00014550779489952239, | |
| "epoch": 0.27, | |
| "step": 18150 | |
| }, | |
| { | |
| "loss": 1.2484, | |
| "grad_norm": 3.6144516468048096, | |
| "learning_rate": 0.00014543269952840108, | |
| "epoch": 0.27, | |
| "step": 18175 | |
| }, | |
| { | |
| "loss": 1.1652, | |
| "grad_norm": 5.405023574829102, | |
| "learning_rate": 0.00014535760415727974, | |
| "epoch": 0.27, | |
| "step": 18200 | |
| }, | |
| { | |
| "loss": 1.1736, | |
| "grad_norm": 1.360303521156311, | |
| "learning_rate": 0.00014528250878615844, | |
| "epoch": 0.27, | |
| "step": 18225 | |
| }, | |
| { | |
| "loss": 1.1258, | |
| "grad_norm": 2.1543657779693604, | |
| "learning_rate": 0.0001452074134150371, | |
| "epoch": 0.27, | |
| "step": 18250 | |
| }, | |
| { | |
| "loss": 1.2295, | |
| "grad_norm": 1.6289156675338745, | |
| "learning_rate": 0.0001451323180439158, | |
| "epoch": 0.27, | |
| "step": 18275 | |
| }, | |
| { | |
| "loss": 1.1509, | |
| "grad_norm": 1.6996594667434692, | |
| "learning_rate": 0.00014505722267279446, | |
| "epoch": 0.27, | |
| "step": 18300 | |
| }, | |
| { | |
| "loss": 1.1466, | |
| "grad_norm": 1.9973461627960205, | |
| "learning_rate": 0.00014498212730167312, | |
| "epoch": 0.28, | |
| "step": 18325 | |
| }, | |
| { | |
| "loss": 1.1387, | |
| "grad_norm": 1.3268439769744873, | |
| "learning_rate": 0.00014490703193055182, | |
| "epoch": 0.28, | |
| "step": 18350 | |
| }, | |
| { | |
| "loss": 1.2239, | |
| "grad_norm": 1.3260868787765503, | |
| "learning_rate": 0.00014483193655943048, | |
| "epoch": 0.28, | |
| "step": 18375 | |
| }, | |
| { | |
| "loss": 1.2155, | |
| "grad_norm": 1.745481014251709, | |
| "learning_rate": 0.00014475684118830917, | |
| "epoch": 0.28, | |
| "step": 18400 | |
| }, | |
| { | |
| "loss": 1.1715, | |
| "grad_norm": 1.1252262592315674, | |
| "learning_rate": 0.00014468174581718784, | |
| "epoch": 0.28, | |
| "step": 18425 | |
| }, | |
| { | |
| "loss": 1.1727, | |
| "grad_norm": 2.9935803413391113, | |
| "learning_rate": 0.0001446066504460665, | |
| "epoch": 0.28, | |
| "step": 18450 | |
| }, | |
| { | |
| "loss": 1.1934, | |
| "grad_norm": 3.0998411178588867, | |
| "learning_rate": 0.00014453155507494517, | |
| "epoch": 0.28, | |
| "step": 18475 | |
| }, | |
| { | |
| "loss": 1.19, | |
| "grad_norm": 2.01745343208313, | |
| "learning_rate": 0.00014445645970382386, | |
| "epoch": 0.28, | |
| "step": 18500 | |
| }, | |
| { | |
| "loss": 1.1656, | |
| "grad_norm": 1.6752148866653442, | |
| "learning_rate": 0.00014438136433270253, | |
| "epoch": 0.28, | |
| "step": 18525 | |
| }, | |
| { | |
| "loss": 1.1701, | |
| "grad_norm": 1.126939058303833, | |
| "learning_rate": 0.00014430626896158122, | |
| "epoch": 0.28, | |
| "step": 18550 | |
| }, | |
| { | |
| "loss": 1.1228, | |
| "grad_norm": 1.5768241882324219, | |
| "learning_rate": 0.0001442311735904599, | |
| "epoch": 0.28, | |
| "step": 18575 | |
| }, | |
| { | |
| "loss": 1.1935, | |
| "grad_norm": 1.1016457080841064, | |
| "learning_rate": 0.00014415607821933855, | |
| "epoch": 0.28, | |
| "step": 18600 | |
| }, | |
| { | |
| "loss": 1.2472, | |
| "grad_norm": 2.9630792140960693, | |
| "learning_rate": 0.00014408098284821724, | |
| "epoch": 0.28, | |
| "step": 18625 | |
| }, | |
| { | |
| "loss": 1.191, | |
| "grad_norm": 1.2299975156784058, | |
| "learning_rate": 0.0001440058874770959, | |
| "epoch": 0.28, | |
| "step": 18650 | |
| }, | |
| { | |
| "loss": 1.1604, | |
| "grad_norm": 1.3096675872802734, | |
| "learning_rate": 0.0001439307921059746, | |
| "epoch": 0.28, | |
| "step": 18675 | |
| }, | |
| { | |
| "loss": 1.1423, | |
| "grad_norm": 2.186399459838867, | |
| "learning_rate": 0.00014385569673485327, | |
| "epoch": 0.28, | |
| "step": 18700 | |
| }, | |
| { | |
| "loss": 1.1783, | |
| "grad_norm": 1.5450773239135742, | |
| "learning_rate": 0.00014378060136373196, | |
| "epoch": 0.28, | |
| "step": 18725 | |
| }, | |
| { | |
| "loss": 1.2721, | |
| "grad_norm": 1.384564757347107, | |
| "learning_rate": 0.00014370550599261062, | |
| "epoch": 0.28, | |
| "step": 18750 | |
| }, | |
| { | |
| "loss": 1.2521, | |
| "grad_norm": 2.277376174926758, | |
| "learning_rate": 0.0001436304106214893, | |
| "epoch": 0.28, | |
| "step": 18775 | |
| }, | |
| { | |
| "loss": 1.2283, | |
| "grad_norm": 1.0917941331863403, | |
| "learning_rate": 0.00014355531525036798, | |
| "epoch": 0.28, | |
| "step": 18800 | |
| }, | |
| { | |
| "loss": 1.2139, | |
| "grad_norm": 2.3607280254364014, | |
| "learning_rate": 0.00014348021987924665, | |
| "epoch": 0.28, | |
| "step": 18825 | |
| }, | |
| { | |
| "loss": 1.2017, | |
| "grad_norm": 1.4834787845611572, | |
| "learning_rate": 0.00014340512450812534, | |
| "epoch": 0.28, | |
| "step": 18850 | |
| }, | |
| { | |
| "loss": 1.1556, | |
| "grad_norm": 1.913205623626709, | |
| "learning_rate": 0.000143330029137004, | |
| "epoch": 0.28, | |
| "step": 18875 | |
| }, | |
| { | |
| "loss": 1.1796, | |
| "grad_norm": 1.4506784677505493, | |
| "learning_rate": 0.00014325493376588267, | |
| "epoch": 0.28, | |
| "step": 18900 | |
| }, | |
| { | |
| "loss": 1.1792, | |
| "grad_norm": 1.0843782424926758, | |
| "learning_rate": 0.00014317983839476134, | |
| "epoch": 0.28, | |
| "step": 18925 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 1.2553937435150146, | |
| "learning_rate": 0.00014310474302364003, | |
| "epoch": 0.28, | |
| "step": 18950 | |
| }, | |
| { | |
| "loss": 1.1944, | |
| "grad_norm": 0.9680384397506714, | |
| "learning_rate": 0.0001430296476525187, | |
| "epoch": 0.28, | |
| "step": 18975 | |
| }, | |
| { | |
| "loss": 1.2441, | |
| "grad_norm": 1.4088304042816162, | |
| "learning_rate": 0.00014295455228139739, | |
| "epoch": 0.29, | |
| "step": 19000 | |
| }, | |
| { | |
| "loss": 1.1978, | |
| "grad_norm": 1.0669535398483276, | |
| "learning_rate": 0.00014287945691027608, | |
| "epoch": 0.29, | |
| "step": 19025 | |
| }, | |
| { | |
| "loss": 1.2014, | |
| "grad_norm": 1.6889104843139648, | |
| "learning_rate": 0.00014280436153915472, | |
| "epoch": 0.29, | |
| "step": 19050 | |
| }, | |
| { | |
| "loss": 1.2006, | |
| "grad_norm": 1.6797627210617065, | |
| "learning_rate": 0.0001427292661680334, | |
| "epoch": 0.29, | |
| "step": 19075 | |
| }, | |
| { | |
| "loss": 1.203, | |
| "grad_norm": 1.4236091375350952, | |
| "learning_rate": 0.00014265417079691207, | |
| "epoch": 0.29, | |
| "step": 19100 | |
| }, | |
| { | |
| "loss": 1.1643, | |
| "grad_norm": 1.0303690433502197, | |
| "learning_rate": 0.00014257907542579077, | |
| "epoch": 0.29, | |
| "step": 19125 | |
| }, | |
| { | |
| "loss": 1.1999, | |
| "grad_norm": 1.8537395000457764, | |
| "learning_rate": 0.00014250398005466943, | |
| "epoch": 0.29, | |
| "step": 19150 | |
| }, | |
| { | |
| "loss": 1.123, | |
| "grad_norm": 1.440233588218689, | |
| "learning_rate": 0.00014242888468354812, | |
| "epoch": 0.29, | |
| "step": 19175 | |
| }, | |
| { | |
| "loss": 1.1654, | |
| "grad_norm": 2.0533230304718018, | |
| "learning_rate": 0.0001423537893124268, | |
| "epoch": 0.29, | |
| "step": 19200 | |
| }, | |
| { | |
| "loss": 1.1724, | |
| "grad_norm": 1.7699745893478394, | |
| "learning_rate": 0.00014227869394130546, | |
| "epoch": 0.29, | |
| "step": 19225 | |
| }, | |
| { | |
| "loss": 1.1701, | |
| "grad_norm": 1.248593807220459, | |
| "learning_rate": 0.00014220359857018415, | |
| "epoch": 0.29, | |
| "step": 19250 | |
| }, | |
| { | |
| "loss": 1.2097, | |
| "grad_norm": 1.6481257677078247, | |
| "learning_rate": 0.0001421285031990628, | |
| "epoch": 0.29, | |
| "step": 19275 | |
| }, | |
| { | |
| "loss": 1.1776, | |
| "grad_norm": 1.5135223865509033, | |
| "learning_rate": 0.0001420534078279415, | |
| "epoch": 0.29, | |
| "step": 19300 | |
| }, | |
| { | |
| "loss": 1.166, | |
| "grad_norm": 1.790306568145752, | |
| "learning_rate": 0.00014197831245682017, | |
| "epoch": 0.29, | |
| "step": 19325 | |
| }, | |
| { | |
| "loss": 1.1318, | |
| "grad_norm": 2.1356446743011475, | |
| "learning_rate": 0.00014190321708569884, | |
| "epoch": 0.29, | |
| "step": 19350 | |
| }, | |
| { | |
| "loss": 1.178, | |
| "grad_norm": 1.4826107025146484, | |
| "learning_rate": 0.0001418281217145775, | |
| "epoch": 0.29, | |
| "step": 19375 | |
| }, | |
| { | |
| "loss": 1.1652, | |
| "grad_norm": 1.3520580530166626, | |
| "learning_rate": 0.0001417530263434562, | |
| "epoch": 0.29, | |
| "step": 19400 | |
| }, | |
| { | |
| "loss": 1.2568, | |
| "grad_norm": 1.3266022205352783, | |
| "learning_rate": 0.0001416779309723349, | |
| "epoch": 0.29, | |
| "step": 19425 | |
| }, | |
| { | |
| "loss": 1.1697, | |
| "grad_norm": 1.5133330821990967, | |
| "learning_rate": 0.00014160283560121355, | |
| "epoch": 0.29, | |
| "step": 19450 | |
| }, | |
| { | |
| "loss": 1.1317, | |
| "grad_norm": 1.729530692100525, | |
| "learning_rate": 0.00014152774023009224, | |
| "epoch": 0.29, | |
| "step": 19475 | |
| }, | |
| { | |
| "loss": 1.1676, | |
| "grad_norm": 1.2013927698135376, | |
| "learning_rate": 0.00014145264485897088, | |
| "epoch": 0.29, | |
| "step": 19500 | |
| }, | |
| { | |
| "loss": 1.2311, | |
| "grad_norm": 1.1489402055740356, | |
| "learning_rate": 0.00014137754948784957, | |
| "epoch": 0.29, | |
| "step": 19525 | |
| }, | |
| { | |
| "loss": 1.1642, | |
| "grad_norm": 1.405923843383789, | |
| "learning_rate": 0.00014130245411672824, | |
| "epoch": 0.29, | |
| "step": 19550 | |
| }, | |
| { | |
| "loss": 1.1818, | |
| "grad_norm": 1.4068244695663452, | |
| "learning_rate": 0.00014122735874560693, | |
| "epoch": 0.29, | |
| "step": 19575 | |
| }, | |
| { | |
| "loss": 1.228, | |
| "grad_norm": 1.8172351121902466, | |
| "learning_rate": 0.0001411522633744856, | |
| "epoch": 0.29, | |
| "step": 19600 | |
| }, | |
| { | |
| "loss": 1.1981, | |
| "grad_norm": 2.907489776611328, | |
| "learning_rate": 0.0001410771680033643, | |
| "epoch": 0.29, | |
| "step": 19625 | |
| }, | |
| { | |
| "loss": 1.1957, | |
| "grad_norm": 2.162321090698242, | |
| "learning_rate": 0.00014100207263224296, | |
| "epoch": 0.3, | |
| "step": 19650 | |
| }, | |
| { | |
| "loss": 1.1492, | |
| "grad_norm": 1.433248519897461, | |
| "learning_rate": 0.00014092697726112162, | |
| "epoch": 0.3, | |
| "step": 19675 | |
| }, | |
| { | |
| "loss": 1.16, | |
| "grad_norm": 1.9054490327835083, | |
| "learning_rate": 0.0001408518818900003, | |
| "epoch": 0.3, | |
| "step": 19700 | |
| }, | |
| { | |
| "loss": 1.1988, | |
| "grad_norm": 1.7673982381820679, | |
| "learning_rate": 0.00014077678651887898, | |
| "epoch": 0.3, | |
| "step": 19725 | |
| }, | |
| { | |
| "loss": 1.2049, | |
| "grad_norm": 1.3216012716293335, | |
| "learning_rate": 0.00014070169114775767, | |
| "epoch": 0.3, | |
| "step": 19750 | |
| }, | |
| { | |
| "loss": 1.1345, | |
| "grad_norm": 1.4515612125396729, | |
| "learning_rate": 0.00014062659577663634, | |
| "epoch": 0.3, | |
| "step": 19775 | |
| }, | |
| { | |
| "loss": 1.1776, | |
| "grad_norm": 1.968056559562683, | |
| "learning_rate": 0.000140551500405515, | |
| "epoch": 0.3, | |
| "step": 19800 | |
| }, | |
| { | |
| "loss": 1.2182, | |
| "grad_norm": 1.6644461154937744, | |
| "learning_rate": 0.00014047640503439367, | |
| "epoch": 0.3, | |
| "step": 19825 | |
| }, | |
| { | |
| "loss": 1.1897, | |
| "grad_norm": 2.2730207443237305, | |
| "learning_rate": 0.00014040130966327236, | |
| "epoch": 0.3, | |
| "step": 19850 | |
| }, | |
| { | |
| "loss": 1.1552, | |
| "grad_norm": 1.038794755935669, | |
| "learning_rate": 0.00014032621429215105, | |
| "epoch": 0.3, | |
| "step": 19875 | |
| }, | |
| { | |
| "loss": 1.1796, | |
| "grad_norm": 1.4719074964523315, | |
| "learning_rate": 0.00014025111892102972, | |
| "epoch": 0.3, | |
| "step": 19900 | |
| }, | |
| { | |
| "loss": 1.2031, | |
| "grad_norm": 1.8013041019439697, | |
| "learning_rate": 0.0001401760235499084, | |
| "epoch": 0.3, | |
| "step": 19925 | |
| }, | |
| { | |
| "loss": 1.1864, | |
| "grad_norm": 2.0032236576080322, | |
| "learning_rate": 0.00014010092817878705, | |
| "epoch": 0.3, | |
| "step": 19950 | |
| }, | |
| { | |
| "loss": 1.225, | |
| "grad_norm": 2.1414427757263184, | |
| "learning_rate": 0.00014002583280766574, | |
| "epoch": 0.3, | |
| "step": 19975 | |
| }, | |
| { | |
| "loss": 1.1585, | |
| "grad_norm": 4.096096515655518, | |
| "learning_rate": 0.0001399507374365444, | |
| "epoch": 0.3, | |
| "step": 20000 | |
| }, | |
| { | |
| "loss": 1.2254, | |
| "grad_norm": 1.5664288997650146, | |
| "learning_rate": 0.0001398756420654231, | |
| "epoch": 0.3, | |
| "step": 20025 | |
| }, | |
| { | |
| "loss": 1.0905, | |
| "grad_norm": 1.7429243326187134, | |
| "learning_rate": 0.00013980054669430176, | |
| "epoch": 0.3, | |
| "step": 20050 | |
| }, | |
| { | |
| "loss": 1.1744, | |
| "grad_norm": 1.551805019378662, | |
| "learning_rate": 0.00013972545132318046, | |
| "epoch": 0.3, | |
| "step": 20075 | |
| }, | |
| { | |
| "loss": 1.1998, | |
| "grad_norm": 1.483031988143921, | |
| "learning_rate": 0.00013965035595205912, | |
| "epoch": 0.3, | |
| "step": 20100 | |
| }, | |
| { | |
| "loss": 1.1391, | |
| "grad_norm": 1.2282016277313232, | |
| "learning_rate": 0.00013957526058093779, | |
| "epoch": 0.3, | |
| "step": 20125 | |
| }, | |
| { | |
| "loss": 1.0928, | |
| "grad_norm": 1.4983934164047241, | |
| "learning_rate": 0.00013950016520981648, | |
| "epoch": 0.3, | |
| "step": 20150 | |
| }, | |
| { | |
| "loss": 1.2218, | |
| "grad_norm": 1.7510052919387817, | |
| "learning_rate": 0.00013942506983869514, | |
| "epoch": 0.3, | |
| "step": 20175 | |
| }, | |
| { | |
| "loss": 1.2014, | |
| "grad_norm": 1.6214317083358765, | |
| "learning_rate": 0.00013934997446757384, | |
| "epoch": 0.3, | |
| "step": 20200 | |
| }, | |
| { | |
| "loss": 1.2299, | |
| "grad_norm": 1.8761943578720093, | |
| "learning_rate": 0.0001392748790964525, | |
| "epoch": 0.3, | |
| "step": 20225 | |
| }, | |
| { | |
| "loss": 1.2065, | |
| "grad_norm": 2.8093338012695312, | |
| "learning_rate": 0.00013919978372533117, | |
| "epoch": 0.3, | |
| "step": 20250 | |
| }, | |
| { | |
| "loss": 1.145, | |
| "grad_norm": 1.5288567543029785, | |
| "learning_rate": 0.00013912468835420983, | |
| "epoch": 0.3, | |
| "step": 20275 | |
| }, | |
| { | |
| "loss": 1.1886, | |
| "grad_norm": 1.5765314102172852, | |
| "learning_rate": 0.00013904959298308852, | |
| "epoch": 0.3, | |
| "step": 20300 | |
| }, | |
| { | |
| "loss": 1.1761, | |
| "grad_norm": 1.0417560338974, | |
| "learning_rate": 0.00013897449761196722, | |
| "epoch": 0.31, | |
| "step": 20325 | |
| }, | |
| { | |
| "loss": 1.2366, | |
| "grad_norm": 1.2328884601593018, | |
| "learning_rate": 0.00013889940224084588, | |
| "epoch": 0.31, | |
| "step": 20350 | |
| }, | |
| { | |
| "loss": 1.1157, | |
| "grad_norm": 1.6982795000076294, | |
| "learning_rate": 0.00013882430686972458, | |
| "epoch": 0.31, | |
| "step": 20375 | |
| }, | |
| { | |
| "loss": 1.2139, | |
| "grad_norm": 1.3879860639572144, | |
| "learning_rate": 0.0001387492114986032, | |
| "epoch": 0.31, | |
| "step": 20400 | |
| }, | |
| { | |
| "loss": 1.1945, | |
| "grad_norm": 1.8985368013381958, | |
| "learning_rate": 0.0001386741161274819, | |
| "epoch": 0.31, | |
| "step": 20425 | |
| }, | |
| { | |
| "loss": 1.1541, | |
| "grad_norm": 1.1783545017242432, | |
| "learning_rate": 0.00013859902075636057, | |
| "epoch": 0.31, | |
| "step": 20450 | |
| }, | |
| { | |
| "loss": 1.1777, | |
| "grad_norm": 1.639700174331665, | |
| "learning_rate": 0.00013852392538523926, | |
| "epoch": 0.31, | |
| "step": 20475 | |
| }, | |
| { | |
| "loss": 1.1743, | |
| "grad_norm": 1.1630868911743164, | |
| "learning_rate": 0.00013844883001411796, | |
| "epoch": 0.31, | |
| "step": 20500 | |
| }, | |
| { | |
| "loss": 1.231, | |
| "grad_norm": 1.5663248300552368, | |
| "learning_rate": 0.00013837373464299662, | |
| "epoch": 0.31, | |
| "step": 20525 | |
| }, | |
| { | |
| "loss": 1.2136, | |
| "grad_norm": 1.1791601181030273, | |
| "learning_rate": 0.0001382986392718753, | |
| "epoch": 0.31, | |
| "step": 20550 | |
| }, | |
| { | |
| "loss": 1.1534, | |
| "grad_norm": 1.1631137132644653, | |
| "learning_rate": 0.00013822354390075395, | |
| "epoch": 0.31, | |
| "step": 20575 | |
| }, | |
| { | |
| "loss": 1.2065, | |
| "grad_norm": 3.0869953632354736, | |
| "learning_rate": 0.00013814844852963264, | |
| "epoch": 0.31, | |
| "step": 20600 | |
| }, | |
| { | |
| "loss": 1.1659, | |
| "grad_norm": 1.5045863389968872, | |
| "learning_rate": 0.0001380733531585113, | |
| "epoch": 0.31, | |
| "step": 20625 | |
| }, | |
| { | |
| "loss": 1.2137, | |
| "grad_norm": 1.555591344833374, | |
| "learning_rate": 0.00013799825778739, | |
| "epoch": 0.31, | |
| "step": 20650 | |
| }, | |
| { | |
| "loss": 1.1867, | |
| "grad_norm": 1.1660338640213013, | |
| "learning_rate": 0.00013792316241626867, | |
| "epoch": 0.31, | |
| "step": 20675 | |
| }, | |
| { | |
| "loss": 1.1656, | |
| "grad_norm": 1.3633331060409546, | |
| "learning_rate": 0.00013784806704514733, | |
| "epoch": 0.31, | |
| "step": 20700 | |
| }, | |
| { | |
| "loss": 1.2777, | |
| "grad_norm": 1.714920163154602, | |
| "learning_rate": 0.00013777297167402603, | |
| "epoch": 0.31, | |
| "step": 20725 | |
| }, | |
| { | |
| "loss": 1.226, | |
| "grad_norm": 2.6200525760650635, | |
| "learning_rate": 0.0001376978763029047, | |
| "epoch": 0.31, | |
| "step": 20750 | |
| }, | |
| { | |
| "loss": 1.2066, | |
| "grad_norm": 1.176538109779358, | |
| "learning_rate": 0.00013762278093178338, | |
| "epoch": 0.31, | |
| "step": 20775 | |
| }, | |
| { | |
| "loss": 1.204, | |
| "grad_norm": 1.6918548345565796, | |
| "learning_rate": 0.00013754768556066205, | |
| "epoch": 0.31, | |
| "step": 20800 | |
| }, | |
| { | |
| "loss": 1.1103, | |
| "grad_norm": 1.2101995944976807, | |
| "learning_rate": 0.00013747259018954074, | |
| "epoch": 0.31, | |
| "step": 20825 | |
| }, | |
| { | |
| "loss": 1.2484, | |
| "grad_norm": 2.0804872512817383, | |
| "learning_rate": 0.00013739749481841938, | |
| "epoch": 0.31, | |
| "step": 20850 | |
| }, | |
| { | |
| "loss": 1.2183, | |
| "grad_norm": 2.115626573562622, | |
| "learning_rate": 0.00013732239944729807, | |
| "epoch": 0.31, | |
| "step": 20875 | |
| }, | |
| { | |
| "loss": 1.1542, | |
| "grad_norm": 1.6519482135772705, | |
| "learning_rate": 0.00013724730407617674, | |
| "epoch": 0.31, | |
| "step": 20900 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 2.619948625564575, | |
| "learning_rate": 0.00013717220870505543, | |
| "epoch": 0.31, | |
| "step": 20925 | |
| }, | |
| { | |
| "loss": 1.2, | |
| "grad_norm": 1.5296428203582764, | |
| "learning_rate": 0.00013709711333393412, | |
| "epoch": 0.31, | |
| "step": 20950 | |
| }, | |
| { | |
| "loss": 1.1985, | |
| "grad_norm": 2.707340717315674, | |
| "learning_rate": 0.0001370220179628128, | |
| "epoch": 0.32, | |
| "step": 20975 | |
| }, | |
| { | |
| "loss": 1.179, | |
| "grad_norm": 1.8074674606323242, | |
| "learning_rate": 0.00013694692259169145, | |
| "epoch": 0.32, | |
| "step": 21000 | |
| }, | |
| { | |
| "loss": 1.1687, | |
| "grad_norm": 1.1176238059997559, | |
| "learning_rate": 0.00013687182722057012, | |
| "epoch": 0.32, | |
| "step": 21025 | |
| }, | |
| { | |
| "loss": 1.2645, | |
| "grad_norm": 2.0191187858581543, | |
| "learning_rate": 0.0001367967318494488, | |
| "epoch": 0.32, | |
| "step": 21050 | |
| }, | |
| { | |
| "loss": 1.2807, | |
| "grad_norm": 1.368486762046814, | |
| "learning_rate": 0.00013672163647832747, | |
| "epoch": 0.32, | |
| "step": 21075 | |
| }, | |
| { | |
| "loss": 1.1935, | |
| "grad_norm": 1.582977294921875, | |
| "learning_rate": 0.00013664654110720617, | |
| "epoch": 0.32, | |
| "step": 21100 | |
| }, | |
| { | |
| "loss": 1.2249, | |
| "grad_norm": 1.6462111473083496, | |
| "learning_rate": 0.00013657144573608483, | |
| "epoch": 0.32, | |
| "step": 21125 | |
| }, | |
| { | |
| "loss": 1.1481, | |
| "grad_norm": 2.2449021339416504, | |
| "learning_rate": 0.0001364963503649635, | |
| "epoch": 0.32, | |
| "step": 21150 | |
| }, | |
| { | |
| "loss": 1.2925, | |
| "grad_norm": 1.45096755027771, | |
| "learning_rate": 0.0001364212549938422, | |
| "epoch": 0.32, | |
| "step": 21175 | |
| }, | |
| { | |
| "loss": 1.2251, | |
| "grad_norm": 1.5417848825454712, | |
| "learning_rate": 0.00013634615962272086, | |
| "epoch": 0.32, | |
| "step": 21200 | |
| }, | |
| { | |
| "loss": 1.1838, | |
| "grad_norm": 1.4828438758850098, | |
| "learning_rate": 0.00013627106425159955, | |
| "epoch": 0.32, | |
| "step": 21225 | |
| }, | |
| { | |
| "loss": 1.2017, | |
| "grad_norm": 1.9270501136779785, | |
| "learning_rate": 0.0001361959688804782, | |
| "epoch": 0.32, | |
| "step": 21250 | |
| }, | |
| { | |
| "loss": 1.167, | |
| "grad_norm": 1.438550353050232, | |
| "learning_rate": 0.0001361208735093569, | |
| "epoch": 0.32, | |
| "step": 21275 | |
| }, | |
| { | |
| "loss": 1.1553, | |
| "grad_norm": 1.860770344734192, | |
| "learning_rate": 0.00013604577813823557, | |
| "epoch": 0.32, | |
| "step": 21300 | |
| }, | |
| { | |
| "loss": 1.1355, | |
| "grad_norm": 2.12158203125, | |
| "learning_rate": 0.00013597068276711424, | |
| "epoch": 0.32, | |
| "step": 21325 | |
| }, | |
| { | |
| "loss": 1.1958, | |
| "grad_norm": 1.2415894269943237, | |
| "learning_rate": 0.0001358955873959929, | |
| "epoch": 0.32, | |
| "step": 21350 | |
| }, | |
| { | |
| "loss": 1.0986, | |
| "grad_norm": 2.1204869747161865, | |
| "learning_rate": 0.0001358204920248716, | |
| "epoch": 0.32, | |
| "step": 21375 | |
| }, | |
| { | |
| "loss": 1.1916, | |
| "grad_norm": 2.0683250427246094, | |
| "learning_rate": 0.0001357453966537503, | |
| "epoch": 0.32, | |
| "step": 21400 | |
| }, | |
| { | |
| "loss": 1.2799, | |
| "grad_norm": 1.136094331741333, | |
| "learning_rate": 0.00013567030128262895, | |
| "epoch": 0.32, | |
| "step": 21425 | |
| }, | |
| { | |
| "loss": 1.1714, | |
| "grad_norm": 2.614771842956543, | |
| "learning_rate": 0.00013559520591150762, | |
| "epoch": 0.32, | |
| "step": 21450 | |
| }, | |
| { | |
| "loss": 1.1808, | |
| "grad_norm": 1.1263775825500488, | |
| "learning_rate": 0.00013552011054038628, | |
| "epoch": 0.32, | |
| "step": 21475 | |
| }, | |
| { | |
| "loss": 1.1903, | |
| "grad_norm": 1.8330289125442505, | |
| "learning_rate": 0.00013544501516926498, | |
| "epoch": 0.32, | |
| "step": 21500 | |
| }, | |
| { | |
| "loss": 1.1863, | |
| "grad_norm": 2.0172111988067627, | |
| "learning_rate": 0.00013536991979814364, | |
| "epoch": 0.32, | |
| "step": 21525 | |
| }, | |
| { | |
| "loss": 1.2356, | |
| "grad_norm": 1.7615008354187012, | |
| "learning_rate": 0.00013529482442702233, | |
| "epoch": 0.32, | |
| "step": 21550 | |
| }, | |
| { | |
| "loss": 1.2365, | |
| "grad_norm": 3.3480842113494873, | |
| "learning_rate": 0.000135219729055901, | |
| "epoch": 0.32, | |
| "step": 21575 | |
| }, | |
| { | |
| "loss": 1.0925, | |
| "grad_norm": 1.5129296779632568, | |
| "learning_rate": 0.00013514463368477966, | |
| "epoch": 0.32, | |
| "step": 21600 | |
| }, | |
| { | |
| "loss": 1.1838, | |
| "grad_norm": 1.1446235179901123, | |
| "learning_rate": 0.00013506953831365836, | |
| "epoch": 0.32, | |
| "step": 21625 | |
| }, | |
| { | |
| "loss": 1.2593, | |
| "grad_norm": 1.2927684783935547, | |
| "learning_rate": 0.00013499444294253702, | |
| "epoch": 0.33, | |
| "step": 21650 | |
| }, | |
| { | |
| "loss": 1.1879, | |
| "grad_norm": 1.6593775749206543, | |
| "learning_rate": 0.00013491934757141571, | |
| "epoch": 0.33, | |
| "step": 21675 | |
| }, | |
| { | |
| "loss": 1.169, | |
| "grad_norm": 1.3151673078536987, | |
| "learning_rate": 0.00013484425220029438, | |
| "epoch": 0.33, | |
| "step": 21700 | |
| }, | |
| { | |
| "loss": 1.159, | |
| "grad_norm": 1.4625322818756104, | |
| "learning_rate": 0.00013476915682917307, | |
| "epoch": 0.33, | |
| "step": 21725 | |
| }, | |
| { | |
| "loss": 1.1482, | |
| "grad_norm": 1.4630295038223267, | |
| "learning_rate": 0.00013469406145805174, | |
| "epoch": 0.33, | |
| "step": 21750 | |
| }, | |
| { | |
| "loss": 1.1558, | |
| "grad_norm": 1.914694905281067, | |
| "learning_rate": 0.0001346189660869304, | |
| "epoch": 0.33, | |
| "step": 21775 | |
| }, | |
| { | |
| "loss": 1.1895, | |
| "grad_norm": 1.1685444116592407, | |
| "learning_rate": 0.0001345438707158091, | |
| "epoch": 0.33, | |
| "step": 21800 | |
| }, | |
| { | |
| "loss": 1.137, | |
| "grad_norm": 1.8522282838821411, | |
| "learning_rate": 0.00013446877534468776, | |
| "epoch": 0.33, | |
| "step": 21825 | |
| }, | |
| { | |
| "loss": 1.2181, | |
| "grad_norm": 2.1433138847351074, | |
| "learning_rate": 0.00013439367997356645, | |
| "epoch": 0.33, | |
| "step": 21850 | |
| }, | |
| { | |
| "loss": 1.1694, | |
| "grad_norm": 1.1134564876556396, | |
| "learning_rate": 0.00013431858460244512, | |
| "epoch": 0.33, | |
| "step": 21875 | |
| }, | |
| { | |
| "loss": 1.1859, | |
| "grad_norm": 0.9985026121139526, | |
| "learning_rate": 0.00013424348923132378, | |
| "epoch": 0.33, | |
| "step": 21900 | |
| }, | |
| { | |
| "loss": 1.1866, | |
| "grad_norm": 1.732964038848877, | |
| "learning_rate": 0.00013416839386020245, | |
| "epoch": 0.33, | |
| "step": 21925 | |
| }, | |
| { | |
| "loss": 1.1623, | |
| "grad_norm": 1.2273517847061157, | |
| "learning_rate": 0.00013409329848908114, | |
| "epoch": 0.33, | |
| "step": 21950 | |
| }, | |
| { | |
| "loss": 1.1336, | |
| "grad_norm": 1.2174320220947266, | |
| "learning_rate": 0.0001340182031179598, | |
| "epoch": 0.33, | |
| "step": 21975 | |
| }, | |
| { | |
| "loss": 1.1903, | |
| "grad_norm": 2.137214422225952, | |
| "learning_rate": 0.0001339431077468385, | |
| "epoch": 0.33, | |
| "step": 22000 | |
| }, | |
| { | |
| "loss": 1.214, | |
| "grad_norm": 1.2529860734939575, | |
| "learning_rate": 0.0001338680123757172, | |
| "epoch": 0.33, | |
| "step": 22025 | |
| }, | |
| { | |
| "loss": 1.1389, | |
| "grad_norm": 1.8254303932189941, | |
| "learning_rate": 0.00013379291700459583, | |
| "epoch": 0.33, | |
| "step": 22050 | |
| }, | |
| { | |
| "loss": 1.1559, | |
| "grad_norm": 2.0765380859375, | |
| "learning_rate": 0.00013371782163347452, | |
| "epoch": 0.33, | |
| "step": 22075 | |
| }, | |
| { | |
| "loss": 1.1536, | |
| "grad_norm": 1.504064917564392, | |
| "learning_rate": 0.0001336427262623532, | |
| "epoch": 0.33, | |
| "step": 22100 | |
| }, | |
| { | |
| "loss": 1.2144, | |
| "grad_norm": 2.490610122680664, | |
| "learning_rate": 0.00013356763089123188, | |
| "epoch": 0.33, | |
| "step": 22125 | |
| }, | |
| { | |
| "loss": 1.1543, | |
| "grad_norm": 1.6488279104232788, | |
| "learning_rate": 0.00013349253552011054, | |
| "epoch": 0.33, | |
| "step": 22150 | |
| }, | |
| { | |
| "loss": 1.1756, | |
| "grad_norm": 2.970743417739868, | |
| "learning_rate": 0.00013341744014898924, | |
| "epoch": 0.33, | |
| "step": 22175 | |
| }, | |
| { | |
| "loss": 1.2094, | |
| "grad_norm": 1.299083948135376, | |
| "learning_rate": 0.0001333423447778679, | |
| "epoch": 0.33, | |
| "step": 22200 | |
| }, | |
| { | |
| "loss": 1.0779, | |
| "grad_norm": 1.3857295513153076, | |
| "learning_rate": 0.00013326724940674657, | |
| "epoch": 0.33, | |
| "step": 22225 | |
| }, | |
| { | |
| "loss": 1.2177, | |
| "grad_norm": 1.7416950464248657, | |
| "learning_rate": 0.00013319215403562526, | |
| "epoch": 0.33, | |
| "step": 22250 | |
| }, | |
| { | |
| "loss": 1.179, | |
| "grad_norm": 2.380249261856079, | |
| "learning_rate": 0.00013311705866450393, | |
| "epoch": 0.33, | |
| "step": 22275 | |
| }, | |
| { | |
| "loss": 1.2057, | |
| "grad_norm": 1.3791347742080688, | |
| "learning_rate": 0.00013304196329338262, | |
| "epoch": 0.33, | |
| "step": 22300 | |
| }, | |
| { | |
| "loss": 1.1725, | |
| "grad_norm": 1.0284641981124878, | |
| "learning_rate": 0.00013296686792226128, | |
| "epoch": 0.34, | |
| "step": 22325 | |
| }, | |
| { | |
| "loss": 1.1518, | |
| "grad_norm": 2.1696279048919678, | |
| "learning_rate": 0.00013289177255113995, | |
| "epoch": 0.34, | |
| "step": 22350 | |
| }, | |
| { | |
| "loss": 1.2832, | |
| "grad_norm": 1.2163208723068237, | |
| "learning_rate": 0.0001328166771800186, | |
| "epoch": 0.34, | |
| "step": 22375 | |
| }, | |
| { | |
| "loss": 1.1366, | |
| "grad_norm": 1.724770426750183, | |
| "learning_rate": 0.0001327415818088973, | |
| "epoch": 0.34, | |
| "step": 22400 | |
| }, | |
| { | |
| "loss": 1.2067, | |
| "grad_norm": 1.9105318784713745, | |
| "learning_rate": 0.00013266648643777597, | |
| "epoch": 0.34, | |
| "step": 22425 | |
| }, | |
| { | |
| "loss": 1.1917, | |
| "grad_norm": 1.1520806550979614, | |
| "learning_rate": 0.00013259139106665466, | |
| "epoch": 0.34, | |
| "step": 22450 | |
| }, | |
| { | |
| "loss": 1.1637, | |
| "grad_norm": 1.8389378786087036, | |
| "learning_rate": 0.00013251629569553336, | |
| "epoch": 0.34, | |
| "step": 22475 | |
| }, | |
| { | |
| "loss": 1.2151, | |
| "grad_norm": 4.63606595993042, | |
| "learning_rate": 0.000132441200324412, | |
| "epoch": 0.34, | |
| "step": 22500 | |
| }, | |
| { | |
| "loss": 1.196, | |
| "grad_norm": 2.179290771484375, | |
| "learning_rate": 0.0001323661049532907, | |
| "epoch": 0.34, | |
| "step": 22525 | |
| }, | |
| { | |
| "loss": 1.158, | |
| "grad_norm": 1.1105175018310547, | |
| "learning_rate": 0.00013229100958216935, | |
| "epoch": 0.34, | |
| "step": 22550 | |
| }, | |
| { | |
| "loss": 1.1638, | |
| "grad_norm": 1.1015607118606567, | |
| "learning_rate": 0.00013221591421104804, | |
| "epoch": 0.34, | |
| "step": 22575 | |
| }, | |
| { | |
| "loss": 1.1948, | |
| "grad_norm": 1.314866304397583, | |
| "learning_rate": 0.0001321408188399267, | |
| "epoch": 0.34, | |
| "step": 22600 | |
| }, | |
| { | |
| "loss": 1.1234, | |
| "grad_norm": 1.3410804271697998, | |
| "learning_rate": 0.0001320657234688054, | |
| "epoch": 0.34, | |
| "step": 22625 | |
| }, | |
| { | |
| "loss": 1.2106, | |
| "grad_norm": 1.4340014457702637, | |
| "learning_rate": 0.00013199062809768407, | |
| "epoch": 0.34, | |
| "step": 22650 | |
| }, | |
| { | |
| "loss": 1.2023, | |
| "grad_norm": 2.40155291557312, | |
| "learning_rate": 0.00013191553272656273, | |
| "epoch": 0.34, | |
| "step": 22675 | |
| }, | |
| { | |
| "loss": 1.1545, | |
| "grad_norm": 1.752961277961731, | |
| "learning_rate": 0.00013184043735544143, | |
| "epoch": 0.34, | |
| "step": 22700 | |
| }, | |
| { | |
| "loss": 1.2083, | |
| "grad_norm": 2.0551249980926514, | |
| "learning_rate": 0.0001317653419843201, | |
| "epoch": 0.34, | |
| "step": 22725 | |
| }, | |
| { | |
| "loss": 1.2815, | |
| "grad_norm": 2.0029456615448, | |
| "learning_rate": 0.00013169024661319878, | |
| "epoch": 0.34, | |
| "step": 22750 | |
| }, | |
| { | |
| "loss": 1.1618, | |
| "grad_norm": 1.6569886207580566, | |
| "learning_rate": 0.00013161515124207745, | |
| "epoch": 0.34, | |
| "step": 22775 | |
| }, | |
| { | |
| "loss": 1.1506, | |
| "grad_norm": 1.0627089738845825, | |
| "learning_rate": 0.00013154005587095611, | |
| "epoch": 0.34, | |
| "step": 22800 | |
| }, | |
| { | |
| "loss": 1.178, | |
| "grad_norm": 1.4119595289230347, | |
| "learning_rate": 0.00013146496049983478, | |
| "epoch": 0.34, | |
| "step": 22825 | |
| }, | |
| { | |
| "loss": 1.2293, | |
| "grad_norm": 2.070948839187622, | |
| "learning_rate": 0.00013138986512871347, | |
| "epoch": 0.34, | |
| "step": 22850 | |
| }, | |
| { | |
| "loss": 1.2195, | |
| "grad_norm": 3.2543933391571045, | |
| "learning_rate": 0.00013131476975759214, | |
| "epoch": 0.34, | |
| "step": 22875 | |
| }, | |
| { | |
| "loss": 1.196, | |
| "grad_norm": 2.154444694519043, | |
| "learning_rate": 0.00013123967438647083, | |
| "epoch": 0.34, | |
| "step": 22900 | |
| }, | |
| { | |
| "loss": 1.1807, | |
| "grad_norm": 1.9498579502105713, | |
| "learning_rate": 0.00013116457901534952, | |
| "epoch": 0.34, | |
| "step": 22925 | |
| }, | |
| { | |
| "loss": 1.1659, | |
| "grad_norm": 1.2425457239151, | |
| "learning_rate": 0.00013108948364422816, | |
| "epoch": 0.34, | |
| "step": 22950 | |
| }, | |
| { | |
| "loss": 1.181, | |
| "grad_norm": 1.0989060401916504, | |
| "learning_rate": 0.00013101438827310685, | |
| "epoch": 0.35, | |
| "step": 22975 | |
| }, | |
| { | |
| "loss": 1.1095, | |
| "grad_norm": 1.509493350982666, | |
| "learning_rate": 0.00013093929290198552, | |
| "epoch": 0.35, | |
| "step": 23000 | |
| }, | |
| { | |
| "loss": 1.1686, | |
| "grad_norm": 1.762772798538208, | |
| "learning_rate": 0.0001308641975308642, | |
| "epoch": 0.35, | |
| "step": 23025 | |
| }, | |
| { | |
| "loss": 1.1062, | |
| "grad_norm": 2.1119191646575928, | |
| "learning_rate": 0.00013078910215974288, | |
| "epoch": 0.35, | |
| "step": 23050 | |
| }, | |
| { | |
| "loss": 1.1906, | |
| "grad_norm": 1.1782546043395996, | |
| "learning_rate": 0.00013071400678862157, | |
| "epoch": 0.35, | |
| "step": 23075 | |
| }, | |
| { | |
| "loss": 1.2047, | |
| "grad_norm": 1.2365734577178955, | |
| "learning_rate": 0.00013063891141750023, | |
| "epoch": 0.35, | |
| "step": 23100 | |
| }, | |
| { | |
| "loss": 1.1824, | |
| "grad_norm": 1.0874519348144531, | |
| "learning_rate": 0.0001305638160463789, | |
| "epoch": 0.35, | |
| "step": 23125 | |
| }, | |
| { | |
| "loss": 1.127, | |
| "grad_norm": 1.9339088201522827, | |
| "learning_rate": 0.0001304887206752576, | |
| "epoch": 0.35, | |
| "step": 23150 | |
| }, | |
| { | |
| "loss": 1.1529, | |
| "grad_norm": 2.087249517440796, | |
| "learning_rate": 0.00013041362530413626, | |
| "epoch": 0.35, | |
| "step": 23175 | |
| }, | |
| { | |
| "loss": 1.1736, | |
| "grad_norm": 1.0799955129623413, | |
| "learning_rate": 0.00013033852993301495, | |
| "epoch": 0.35, | |
| "step": 23200 | |
| }, | |
| { | |
| "loss": 1.1652, | |
| "grad_norm": 4.290017127990723, | |
| "learning_rate": 0.00013026343456189361, | |
| "epoch": 0.35, | |
| "step": 23225 | |
| }, | |
| { | |
| "loss": 1.1845, | |
| "grad_norm": 1.8332254886627197, | |
| "learning_rate": 0.00013018833919077228, | |
| "epoch": 0.35, | |
| "step": 23250 | |
| }, | |
| { | |
| "loss": 1.227, | |
| "grad_norm": 2.3208718299865723, | |
| "learning_rate": 0.00013011324381965094, | |
| "epoch": 0.35, | |
| "step": 23275 | |
| }, | |
| { | |
| "loss": 1.0917, | |
| "grad_norm": 1.9536670446395874, | |
| "learning_rate": 0.00013003814844852964, | |
| "epoch": 0.35, | |
| "step": 23300 | |
| }, | |
| { | |
| "loss": 1.1812, | |
| "grad_norm": 1.225029468536377, | |
| "learning_rate": 0.00012996305307740833, | |
| "epoch": 0.35, | |
| "step": 23325 | |
| }, | |
| { | |
| "loss": 1.2249, | |
| "grad_norm": 2.538161039352417, | |
| "learning_rate": 0.000129887957706287, | |
| "epoch": 0.35, | |
| "step": 23350 | |
| }, | |
| { | |
| "loss": 1.1578, | |
| "grad_norm": 1.2378344535827637, | |
| "learning_rate": 0.0001298128623351657, | |
| "epoch": 0.35, | |
| "step": 23375 | |
| }, | |
| { | |
| "loss": 1.1544, | |
| "grad_norm": 3.9860634803771973, | |
| "learning_rate": 0.00012973776696404433, | |
| "epoch": 0.35, | |
| "step": 23400 | |
| }, | |
| { | |
| "loss": 1.1704, | |
| "grad_norm": 1.1592284440994263, | |
| "learning_rate": 0.00012966267159292302, | |
| "epoch": 0.35, | |
| "step": 23425 | |
| }, | |
| { | |
| "loss": 1.2261, | |
| "grad_norm": 0.9641034603118896, | |
| "learning_rate": 0.00012958757622180168, | |
| "epoch": 0.35, | |
| "step": 23450 | |
| }, | |
| { | |
| "loss": 1.1879, | |
| "grad_norm": 2.3419320583343506, | |
| "learning_rate": 0.00012951248085068038, | |
| "epoch": 0.35, | |
| "step": 23475 | |
| }, | |
| { | |
| "loss": 1.1237, | |
| "grad_norm": 1.641772747039795, | |
| "learning_rate": 0.00012943738547955904, | |
| "epoch": 0.35, | |
| "step": 23500 | |
| }, | |
| { | |
| "loss": 1.1636, | |
| "grad_norm": 1.8921740055084229, | |
| "learning_rate": 0.00012936229010843773, | |
| "epoch": 0.35, | |
| "step": 23525 | |
| }, | |
| { | |
| "loss": 1.1919, | |
| "grad_norm": 1.5332955121994019, | |
| "learning_rate": 0.0001292871947373164, | |
| "epoch": 0.35, | |
| "step": 23550 | |
| }, | |
| { | |
| "loss": 1.1632, | |
| "grad_norm": 1.6443663835525513, | |
| "learning_rate": 0.00012921209936619506, | |
| "epoch": 0.35, | |
| "step": 23575 | |
| }, | |
| { | |
| "loss": 1.1963, | |
| "grad_norm": 2.044127941131592, | |
| "learning_rate": 0.00012913700399507376, | |
| "epoch": 0.35, | |
| "step": 23600 | |
| }, | |
| { | |
| "loss": 1.1971, | |
| "grad_norm": 2.1552951335906982, | |
| "learning_rate": 0.00012906190862395242, | |
| "epoch": 0.35, | |
| "step": 23625 | |
| }, | |
| { | |
| "loss": 1.221, | |
| "grad_norm": 1.7061282396316528, | |
| "learning_rate": 0.00012898681325283111, | |
| "epoch": 0.36, | |
| "step": 23650 | |
| }, | |
| { | |
| "loss": 1.1243, | |
| "grad_norm": 1.581986904144287, | |
| "learning_rate": 0.00012891171788170978, | |
| "epoch": 0.36, | |
| "step": 23675 | |
| }, | |
| { | |
| "loss": 1.2158, | |
| "grad_norm": 1.999489665031433, | |
| "learning_rate": 0.00012883662251058844, | |
| "epoch": 0.36, | |
| "step": 23700 | |
| }, | |
| { | |
| "loss": 1.1868, | |
| "grad_norm": 1.5865546464920044, | |
| "learning_rate": 0.0001287615271394671, | |
| "epoch": 0.36, | |
| "step": 23725 | |
| }, | |
| { | |
| "loss": 1.1772, | |
| "grad_norm": 1.1765635013580322, | |
| "learning_rate": 0.0001286864317683458, | |
| "epoch": 0.36, | |
| "step": 23750 | |
| }, | |
| { | |
| "loss": 1.1669, | |
| "grad_norm": 2.248819589614868, | |
| "learning_rate": 0.0001286113363972245, | |
| "epoch": 0.36, | |
| "step": 23775 | |
| }, | |
| { | |
| "loss": 1.1574, | |
| "grad_norm": 1.4647800922393799, | |
| "learning_rate": 0.00012853624102610316, | |
| "epoch": 0.36, | |
| "step": 23800 | |
| }, | |
| { | |
| "loss": 1.1986, | |
| "grad_norm": 1.1818993091583252, | |
| "learning_rate": 0.00012846114565498185, | |
| "epoch": 0.36, | |
| "step": 23825 | |
| }, | |
| { | |
| "loss": 1.1631, | |
| "grad_norm": 1.785582423210144, | |
| "learning_rate": 0.0001283860502838605, | |
| "epoch": 0.36, | |
| "step": 23850 | |
| }, | |
| { | |
| "loss": 1.2067, | |
| "grad_norm": 1.7691236734390259, | |
| "learning_rate": 0.00012831095491273918, | |
| "epoch": 0.36, | |
| "step": 23875 | |
| }, | |
| { | |
| "loss": 1.0843, | |
| "grad_norm": 1.4879204034805298, | |
| "learning_rate": 0.00012823585954161785, | |
| "epoch": 0.36, | |
| "step": 23900 | |
| }, | |
| { | |
| "loss": 1.1911, | |
| "grad_norm": 1.4341880083084106, | |
| "learning_rate": 0.00012816076417049654, | |
| "epoch": 0.36, | |
| "step": 23925 | |
| }, | |
| { | |
| "loss": 1.162, | |
| "grad_norm": 0.8942863345146179, | |
| "learning_rate": 0.0001280856687993752, | |
| "epoch": 0.36, | |
| "step": 23950 | |
| }, | |
| { | |
| "loss": 1.2, | |
| "grad_norm": 1.329323172569275, | |
| "learning_rate": 0.0001280105734282539, | |
| "epoch": 0.36, | |
| "step": 23975 | |
| }, | |
| { | |
| "loss": 1.1484, | |
| "grad_norm": 1.621002197265625, | |
| "learning_rate": 0.00012793547805713256, | |
| "epoch": 0.36, | |
| "step": 24000 | |
| }, | |
| { | |
| "loss": 1.181, | |
| "grad_norm": 1.8257761001586914, | |
| "learning_rate": 0.00012786038268601123, | |
| "epoch": 0.36, | |
| "step": 24025 | |
| }, | |
| { | |
| "loss": 1.1984, | |
| "grad_norm": 2.572247266769409, | |
| "learning_rate": 0.00012778528731488992, | |
| "epoch": 0.36, | |
| "step": 24050 | |
| }, | |
| { | |
| "loss": 1.1867, | |
| "grad_norm": 1.7765648365020752, | |
| "learning_rate": 0.0001277101919437686, | |
| "epoch": 0.36, | |
| "step": 24075 | |
| }, | |
| { | |
| "loss": 1.1984, | |
| "grad_norm": 1.3976967334747314, | |
| "learning_rate": 0.00012763509657264728, | |
| "epoch": 0.36, | |
| "step": 24100 | |
| }, | |
| { | |
| "loss": 1.1685, | |
| "grad_norm": 1.6491625308990479, | |
| "learning_rate": 0.00012756000120152594, | |
| "epoch": 0.36, | |
| "step": 24125 | |
| }, | |
| { | |
| "loss": 1.1497, | |
| "grad_norm": 1.698404312133789, | |
| "learning_rate": 0.0001274849058304046, | |
| "epoch": 0.36, | |
| "step": 24150 | |
| }, | |
| { | |
| "loss": 1.2076, | |
| "grad_norm": 1.2471705675125122, | |
| "learning_rate": 0.00012740981045928328, | |
| "epoch": 0.36, | |
| "step": 24175 | |
| }, | |
| { | |
| "loss": 1.1596, | |
| "grad_norm": 1.2114017009735107, | |
| "learning_rate": 0.00012733471508816197, | |
| "epoch": 0.36, | |
| "step": 24200 | |
| }, | |
| { | |
| "loss": 1.2032, | |
| "grad_norm": 1.1424446105957031, | |
| "learning_rate": 0.00012725961971704066, | |
| "epoch": 0.36, | |
| "step": 24225 | |
| }, | |
| { | |
| "loss": 1.1548, | |
| "grad_norm": 1.3526264429092407, | |
| "learning_rate": 0.00012718452434591933, | |
| "epoch": 0.36, | |
| "step": 24250 | |
| }, | |
| { | |
| "loss": 1.2415, | |
| "grad_norm": 1.2714468240737915, | |
| "learning_rate": 0.00012710942897479802, | |
| "epoch": 0.36, | |
| "step": 24275 | |
| }, | |
| { | |
| "loss": 1.1264, | |
| "grad_norm": 2.064203977584839, | |
| "learning_rate": 0.00012703433360367668, | |
| "epoch": 0.36, | |
| "step": 24300 | |
| }, | |
| { | |
| "loss": 1.1578, | |
| "grad_norm": 1.4952439069747925, | |
| "learning_rate": 0.00012695923823255535, | |
| "epoch": 0.37, | |
| "step": 24325 | |
| }, | |
| { | |
| "loss": 1.1495, | |
| "grad_norm": 1.4773337841033936, | |
| "learning_rate": 0.00012688414286143401, | |
| "epoch": 0.37, | |
| "step": 24350 | |
| }, | |
| { | |
| "loss": 1.1591, | |
| "grad_norm": 1.1870368719100952, | |
| "learning_rate": 0.0001268090474903127, | |
| "epoch": 0.37, | |
| "step": 24375 | |
| }, | |
| { | |
| "loss": 1.1744, | |
| "grad_norm": 1.824880838394165, | |
| "learning_rate": 0.00012673395211919137, | |
| "epoch": 0.37, | |
| "step": 24400 | |
| }, | |
| { | |
| "loss": 1.198, | |
| "grad_norm": 1.18766188621521, | |
| "learning_rate": 0.00012665885674807006, | |
| "epoch": 0.37, | |
| "step": 24425 | |
| }, | |
| { | |
| "loss": 1.2227, | |
| "grad_norm": 1.719905138015747, | |
| "learning_rate": 0.00012658376137694873, | |
| "epoch": 0.37, | |
| "step": 24450 | |
| }, | |
| { | |
| "loss": 1.294, | |
| "grad_norm": 1.9146957397460938, | |
| "learning_rate": 0.0001265086660058274, | |
| "epoch": 0.37, | |
| "step": 24475 | |
| }, | |
| { | |
| "loss": 1.2087, | |
| "grad_norm": 2.0763649940490723, | |
| "learning_rate": 0.0001264335706347061, | |
| "epoch": 0.37, | |
| "step": 24500 | |
| }, | |
| { | |
| "loss": 1.1887, | |
| "grad_norm": 2.3640265464782715, | |
| "learning_rate": 0.00012635847526358475, | |
| "epoch": 0.37, | |
| "step": 24525 | |
| }, | |
| { | |
| "loss": 1.2053, | |
| "grad_norm": 1.9339317083358765, | |
| "learning_rate": 0.00012628337989246344, | |
| "epoch": 0.37, | |
| "step": 24550 | |
| }, | |
| { | |
| "loss": 1.1123, | |
| "grad_norm": 1.4369031190872192, | |
| "learning_rate": 0.0001262082845213421, | |
| "epoch": 0.37, | |
| "step": 24575 | |
| }, | |
| { | |
| "loss": 1.1309, | |
| "grad_norm": 1.2952880859375, | |
| "learning_rate": 0.00012613318915022078, | |
| "epoch": 0.37, | |
| "step": 24600 | |
| }, | |
| { | |
| "loss": 1.1589, | |
| "grad_norm": 2.8487777709960938, | |
| "learning_rate": 0.00012605809377909947, | |
| "epoch": 0.37, | |
| "step": 24625 | |
| }, | |
| { | |
| "loss": 1.1054, | |
| "grad_norm": 1.1736781597137451, | |
| "learning_rate": 0.00012598299840797813, | |
| "epoch": 0.37, | |
| "step": 24650 | |
| }, | |
| { | |
| "loss": 1.1255, | |
| "grad_norm": 1.5358980894088745, | |
| "learning_rate": 0.00012590790303685683, | |
| "epoch": 0.37, | |
| "step": 24675 | |
| }, | |
| { | |
| "loss": 1.2041, | |
| "grad_norm": 2.0065975189208984, | |
| "learning_rate": 0.0001258328076657355, | |
| "epoch": 0.37, | |
| "step": 24700 | |
| }, | |
| { | |
| "loss": 1.15, | |
| "grad_norm": 1.2211554050445557, | |
| "learning_rate": 0.00012575771229461418, | |
| "epoch": 0.37, | |
| "step": 24725 | |
| }, | |
| { | |
| "loss": 1.1834, | |
| "grad_norm": 1.3376033306121826, | |
| "learning_rate": 0.00012568261692349285, | |
| "epoch": 0.37, | |
| "step": 24750 | |
| }, | |
| { | |
| "loss": 1.2355, | |
| "grad_norm": 2.8535170555114746, | |
| "learning_rate": 0.00012560752155237151, | |
| "epoch": 0.37, | |
| "step": 24775 | |
| }, | |
| { | |
| "loss": 1.1949, | |
| "grad_norm": 1.9856910705566406, | |
| "learning_rate": 0.00012553242618125018, | |
| "epoch": 0.37, | |
| "step": 24800 | |
| }, | |
| { | |
| "loss": 1.1887, | |
| "grad_norm": 2.9144210815429688, | |
| "learning_rate": 0.00012545733081012887, | |
| "epoch": 0.37, | |
| "step": 24825 | |
| }, | |
| { | |
| "loss": 1.1893, | |
| "grad_norm": 1.4913091659545898, | |
| "learning_rate": 0.00012538223543900756, | |
| "epoch": 0.37, | |
| "step": 24850 | |
| }, | |
| { | |
| "loss": 1.1173, | |
| "grad_norm": 1.685804009437561, | |
| "learning_rate": 0.00012530714006788623, | |
| "epoch": 0.37, | |
| "step": 24875 | |
| }, | |
| { | |
| "loss": 1.1303, | |
| "grad_norm": 1.3694686889648438, | |
| "learning_rate": 0.00012523504851160973, | |
| "epoch": 0.37, | |
| "step": 24900 | |
| }, | |
| { | |
| "loss": 1.2075, | |
| "grad_norm": 1.3392975330352783, | |
| "learning_rate": 0.00012515995314048842, | |
| "epoch": 0.37, | |
| "step": 24925 | |
| }, | |
| { | |
| "loss": 1.1981, | |
| "grad_norm": 1.352869987487793, | |
| "learning_rate": 0.0001250848577693671, | |
| "epoch": 0.37, | |
| "step": 24950 | |
| }, | |
| { | |
| "loss": 1.1808, | |
| "grad_norm": 1.1106911897659302, | |
| "learning_rate": 0.00012500976239824578, | |
| "epoch": 0.38, | |
| "step": 24975 | |
| }, | |
| { | |
| "loss": 1.1819, | |
| "grad_norm": 1.2609456777572632, | |
| "learning_rate": 0.00012493466702712447, | |
| "epoch": 0.38, | |
| "step": 25000 | |
| }, | |
| { | |
| "loss": 1.1571, | |
| "grad_norm": 1.3581352233886719, | |
| "learning_rate": 0.00012485957165600314, | |
| "epoch": 0.38, | |
| "step": 25025 | |
| }, | |
| { | |
| "loss": 1.2111, | |
| "grad_norm": 1.7891106605529785, | |
| "learning_rate": 0.0001247844762848818, | |
| "epoch": 0.38, | |
| "step": 25050 | |
| }, | |
| { | |
| "loss": 1.2029, | |
| "grad_norm": 2.628241539001465, | |
| "learning_rate": 0.00012470938091376047, | |
| "epoch": 0.38, | |
| "step": 25075 | |
| }, | |
| { | |
| "loss": 1.1415, | |
| "grad_norm": 1.5528656244277954, | |
| "learning_rate": 0.00012463428554263916, | |
| "epoch": 0.38, | |
| "step": 25100 | |
| }, | |
| { | |
| "loss": 1.0769, | |
| "grad_norm": 2.0100932121276855, | |
| "learning_rate": 0.00012455919017151783, | |
| "epoch": 0.38, | |
| "step": 25125 | |
| }, | |
| { | |
| "loss": 1.171, | |
| "grad_norm": 2.7479538917541504, | |
| "learning_rate": 0.00012448409480039652, | |
| "epoch": 0.38, | |
| "step": 25150 | |
| }, | |
| { | |
| "loss": 1.1868, | |
| "grad_norm": 2.177091360092163, | |
| "learning_rate": 0.00012440899942927518, | |
| "epoch": 0.38, | |
| "step": 25175 | |
| }, | |
| { | |
| "loss": 1.1472, | |
| "grad_norm": 1.9711464643478394, | |
| "learning_rate": 0.00012433390405815385, | |
| "epoch": 0.38, | |
| "step": 25200 | |
| }, | |
| { | |
| "loss": 1.1982, | |
| "grad_norm": 1.4624091386795044, | |
| "learning_rate": 0.00012425880868703254, | |
| "epoch": 0.38, | |
| "step": 25225 | |
| }, | |
| { | |
| "loss": 1.1806, | |
| "grad_norm": 1.7121859788894653, | |
| "learning_rate": 0.0001241837133159112, | |
| "epoch": 0.38, | |
| "step": 25250 | |
| }, | |
| { | |
| "loss": 1.1943, | |
| "grad_norm": 2.1174204349517822, | |
| "learning_rate": 0.0001241086179447899, | |
| "epoch": 0.38, | |
| "step": 25275 | |
| }, | |
| { | |
| "loss": 1.1742, | |
| "grad_norm": 1.2425144910812378, | |
| "learning_rate": 0.00012403352257366857, | |
| "epoch": 0.38, | |
| "step": 25300 | |
| }, | |
| { | |
| "loss": 1.1316, | |
| "grad_norm": 2.102142572402954, | |
| "learning_rate": 0.00012395842720254726, | |
| "epoch": 0.38, | |
| "step": 25325 | |
| }, | |
| { | |
| "loss": 1.1717, | |
| "grad_norm": 1.7592540979385376, | |
| "learning_rate": 0.0001238833318314259, | |
| "epoch": 0.38, | |
| "step": 25350 | |
| }, | |
| { | |
| "loss": 1.2086, | |
| "grad_norm": 1.7676315307617188, | |
| "learning_rate": 0.0001238082364603046, | |
| "epoch": 0.38, | |
| "step": 25375 | |
| }, | |
| { | |
| "loss": 1.1386, | |
| "grad_norm": 1.154153823852539, | |
| "learning_rate": 0.00012373314108918325, | |
| "epoch": 0.38, | |
| "step": 25400 | |
| }, | |
| { | |
| "loss": 1.1803, | |
| "grad_norm": 2.522324800491333, | |
| "learning_rate": 0.00012365804571806195, | |
| "epoch": 0.38, | |
| "step": 25425 | |
| }, | |
| { | |
| "loss": 1.2331, | |
| "grad_norm": 1.699385404586792, | |
| "learning_rate": 0.00012358295034694064, | |
| "epoch": 0.38, | |
| "step": 25450 | |
| }, | |
| { | |
| "loss": 1.2247, | |
| "grad_norm": 1.836391568183899, | |
| "learning_rate": 0.0001235078549758193, | |
| "epoch": 0.38, | |
| "step": 25475 | |
| }, | |
| { | |
| "loss": 1.1509, | |
| "grad_norm": 1.2097364664077759, | |
| "learning_rate": 0.00012343275960469797, | |
| "epoch": 0.38, | |
| "step": 25500 | |
| }, | |
| { | |
| "loss": 1.1488, | |
| "grad_norm": 0.8426992893218994, | |
| "learning_rate": 0.00012335766423357663, | |
| "epoch": 0.38, | |
| "step": 25525 | |
| }, | |
| { | |
| "loss": 1.1434, | |
| "grad_norm": 1.2710751295089722, | |
| "learning_rate": 0.00012328256886245533, | |
| "epoch": 0.38, | |
| "step": 25550 | |
| }, | |
| { | |
| "loss": 1.131, | |
| "grad_norm": 1.567521095275879, | |
| "learning_rate": 0.000123207473491334, | |
| "epoch": 0.38, | |
| "step": 25575 | |
| }, | |
| { | |
| "loss": 1.2268, | |
| "grad_norm": 1.6876307725906372, | |
| "learning_rate": 0.00012313237812021268, | |
| "epoch": 0.38, | |
| "step": 25600 | |
| }, | |
| { | |
| "loss": 1.178, | |
| "grad_norm": 1.5570650100708008, | |
| "learning_rate": 0.00012305728274909135, | |
| "epoch": 0.38, | |
| "step": 25625 | |
| }, | |
| { | |
| "loss": 1.1128, | |
| "grad_norm": 1.9181684255599976, | |
| "learning_rate": 0.00012298218737797002, | |
| "epoch": 0.39, | |
| "step": 25650 | |
| }, | |
| { | |
| "loss": 1.1662, | |
| "grad_norm": 1.4703614711761475, | |
| "learning_rate": 0.0001229070920068487, | |
| "epoch": 0.39, | |
| "step": 25675 | |
| }, | |
| { | |
| "loss": 1.2166, | |
| "grad_norm": 1.1674293279647827, | |
| "learning_rate": 0.00012283199663572737, | |
| "epoch": 0.39, | |
| "step": 25700 | |
| }, | |
| { | |
| "loss": 1.1962, | |
| "grad_norm": 2.910494565963745, | |
| "learning_rate": 0.00012275690126460607, | |
| "epoch": 0.39, | |
| "step": 25725 | |
| }, | |
| { | |
| "loss": 1.1996, | |
| "grad_norm": 1.249042272567749, | |
| "learning_rate": 0.00012268180589348473, | |
| "epoch": 0.39, | |
| "step": 25750 | |
| }, | |
| { | |
| "loss": 1.1962, | |
| "grad_norm": 2.1757421493530273, | |
| "learning_rate": 0.00012260671052236342, | |
| "epoch": 0.39, | |
| "step": 25775 | |
| }, | |
| { | |
| "loss": 1.1302, | |
| "grad_norm": 1.8201817274093628, | |
| "learning_rate": 0.00012253161515124206, | |
| "epoch": 0.39, | |
| "step": 25800 | |
| }, | |
| { | |
| "loss": 1.1242, | |
| "grad_norm": 1.2587064504623413, | |
| "learning_rate": 0.00012245651978012075, | |
| "epoch": 0.39, | |
| "step": 25825 | |
| }, | |
| { | |
| "loss": 1.1353, | |
| "grad_norm": 1.9519400596618652, | |
| "learning_rate": 0.00012238142440899945, | |
| "epoch": 0.39, | |
| "step": 25850 | |
| }, | |
| { | |
| "loss": 1.2128, | |
| "grad_norm": 1.997555136680603, | |
| "learning_rate": 0.0001223063290378781, | |
| "epoch": 0.39, | |
| "step": 25875 | |
| }, | |
| { | |
| "loss": 1.1383, | |
| "grad_norm": 1.9942442178726196, | |
| "learning_rate": 0.0001222312336667568, | |
| "epoch": 0.39, | |
| "step": 25900 | |
| }, | |
| { | |
| "loss": 1.1726, | |
| "grad_norm": 2.1078426837921143, | |
| "learning_rate": 0.00012215613829563547, | |
| "epoch": 0.39, | |
| "step": 25925 | |
| }, | |
| { | |
| "loss": 1.1349, | |
| "grad_norm": 2.8128950595855713, | |
| "learning_rate": 0.00012208104292451413, | |
| "epoch": 0.39, | |
| "step": 25950 | |
| }, | |
| { | |
| "loss": 1.1536, | |
| "grad_norm": 1.986128330230713, | |
| "learning_rate": 0.00012200594755339281, | |
| "epoch": 0.39, | |
| "step": 25975 | |
| }, | |
| { | |
| "loss": 1.1194, | |
| "grad_norm": 1.418022871017456, | |
| "learning_rate": 0.00012193085218227149, | |
| "epoch": 0.39, | |
| "step": 26000 | |
| }, | |
| { | |
| "loss": 1.1819, | |
| "grad_norm": 1.2267699241638184, | |
| "learning_rate": 0.00012185575681115016, | |
| "epoch": 0.39, | |
| "step": 26025 | |
| }, | |
| { | |
| "loss": 1.1222, | |
| "grad_norm": 1.4214072227478027, | |
| "learning_rate": 0.00012178066144002884, | |
| "epoch": 0.39, | |
| "step": 26050 | |
| }, | |
| { | |
| "loss": 1.2028, | |
| "grad_norm": 3.486180543899536, | |
| "learning_rate": 0.00012170556606890753, | |
| "epoch": 0.39, | |
| "step": 26075 | |
| }, | |
| { | |
| "loss": 1.1714, | |
| "grad_norm": 1.6389093399047852, | |
| "learning_rate": 0.0001216304706977862, | |
| "epoch": 0.39, | |
| "step": 26100 | |
| }, | |
| { | |
| "loss": 1.1689, | |
| "grad_norm": 1.5613031387329102, | |
| "learning_rate": 0.00012155537532666487, | |
| "epoch": 0.39, | |
| "step": 26125 | |
| }, | |
| { | |
| "loss": 1.1821, | |
| "grad_norm": 1.5050113201141357, | |
| "learning_rate": 0.00012148027995554354, | |
| "epoch": 0.39, | |
| "step": 26150 | |
| }, | |
| { | |
| "loss": 1.167, | |
| "grad_norm": 1.2190027236938477, | |
| "learning_rate": 0.00012140518458442223, | |
| "epoch": 0.39, | |
| "step": 26175 | |
| }, | |
| { | |
| "loss": 1.2042, | |
| "grad_norm": 1.0376909971237183, | |
| "learning_rate": 0.0001213300892133009, | |
| "epoch": 0.39, | |
| "step": 26200 | |
| }, | |
| { | |
| "loss": 1.1713, | |
| "grad_norm": 1.036734938621521, | |
| "learning_rate": 0.00012126100147186927, | |
| "epoch": 0.39, | |
| "step": 26225 | |
| }, | |
| { | |
| "loss": 1.1867, | |
| "grad_norm": 0.933276355266571, | |
| "learning_rate": 0.00012118890991559282, | |
| "epoch": 0.39, | |
| "step": 26250 | |
| }, | |
| { | |
| "loss": 1.1568, | |
| "grad_norm": 1.8247997760772705, | |
| "learning_rate": 0.00012111381454447148, | |
| "epoch": 0.39, | |
| "step": 26275 | |
| }, | |
| { | |
| "loss": 1.1209, | |
| "grad_norm": 1.7920253276824951, | |
| "learning_rate": 0.00012103871917335017, | |
| "epoch": 0.39, | |
| "step": 26300 | |
| }, | |
| { | |
| "loss": 1.1424, | |
| "grad_norm": 1.558129906654358, | |
| "learning_rate": 0.00012096362380222883, | |
| "epoch": 0.4, | |
| "step": 26325 | |
| }, | |
| { | |
| "loss": 1.1207, | |
| "grad_norm": 2.0236053466796875, | |
| "learning_rate": 0.00012088852843110752, | |
| "epoch": 0.4, | |
| "step": 26350 | |
| }, | |
| { | |
| "loss": 1.1367, | |
| "grad_norm": 2.042004108428955, | |
| "learning_rate": 0.00012081343305998618, | |
| "epoch": 0.4, | |
| "step": 26375 | |
| }, | |
| { | |
| "loss": 1.1823, | |
| "grad_norm": 1.694769024848938, | |
| "learning_rate": 0.00012073833768886486, | |
| "epoch": 0.4, | |
| "step": 26400 | |
| }, | |
| { | |
| "loss": 1.1536, | |
| "grad_norm": 2.398012399673462, | |
| "learning_rate": 0.00012066324231774353, | |
| "epoch": 0.4, | |
| "step": 26425 | |
| }, | |
| { | |
| "loss": 1.2019, | |
| "grad_norm": 3.8714237213134766, | |
| "learning_rate": 0.00012058814694662222, | |
| "epoch": 0.4, | |
| "step": 26450 | |
| }, | |
| { | |
| "loss": 1.2109, | |
| "grad_norm": 2.893437147140503, | |
| "learning_rate": 0.0001205130515755009, | |
| "epoch": 0.4, | |
| "step": 26475 | |
| }, | |
| { | |
| "loss": 1.2099, | |
| "grad_norm": 1.7134922742843628, | |
| "learning_rate": 0.00012043795620437956, | |
| "epoch": 0.4, | |
| "step": 26500 | |
| }, | |
| { | |
| "loss": 1.1989, | |
| "grad_norm": 2.3126907348632812, | |
| "learning_rate": 0.00012036286083325826, | |
| "epoch": 0.4, | |
| "step": 26525 | |
| }, | |
| { | |
| "loss": 1.121, | |
| "grad_norm": 2.5289969444274902, | |
| "learning_rate": 0.00012028776546213691, | |
| "epoch": 0.4, | |
| "step": 26550 | |
| }, | |
| { | |
| "loss": 1.21, | |
| "grad_norm": 2.551736354827881, | |
| "learning_rate": 0.0001202126700910156, | |
| "epoch": 0.4, | |
| "step": 26575 | |
| }, | |
| { | |
| "loss": 1.1433, | |
| "grad_norm": 1.8382607698440552, | |
| "learning_rate": 0.00012013757471989427, | |
| "epoch": 0.4, | |
| "step": 26600 | |
| }, | |
| { | |
| "loss": 1.1714, | |
| "grad_norm": 1.9856308698654175, | |
| "learning_rate": 0.00012006247934877295, | |
| "epoch": 0.4, | |
| "step": 26625 | |
| }, | |
| { | |
| "loss": 1.1646, | |
| "grad_norm": 1.3132210969924927, | |
| "learning_rate": 0.00011998738397765161, | |
| "epoch": 0.4, | |
| "step": 26650 | |
| }, | |
| { | |
| "loss": 1.1394, | |
| "grad_norm": 1.8171156644821167, | |
| "learning_rate": 0.0001199122886065303, | |
| "epoch": 0.4, | |
| "step": 26675 | |
| }, | |
| { | |
| "loss": 1.1494, | |
| "grad_norm": 1.6852163076400757, | |
| "learning_rate": 0.00011983719323540898, | |
| "epoch": 0.4, | |
| "step": 26700 | |
| }, | |
| { | |
| "loss": 1.1522, | |
| "grad_norm": 1.7947680950164795, | |
| "learning_rate": 0.00011976209786428765, | |
| "epoch": 0.4, | |
| "step": 26725 | |
| }, | |
| { | |
| "loss": 1.1794, | |
| "grad_norm": 2.0458626747131348, | |
| "learning_rate": 0.00011968700249316634, | |
| "epoch": 0.4, | |
| "step": 26750 | |
| }, | |
| { | |
| "loss": 1.2013, | |
| "grad_norm": 1.6670138835906982, | |
| "learning_rate": 0.00011961190712204499, | |
| "epoch": 0.4, | |
| "step": 26775 | |
| }, | |
| { | |
| "loss": 1.2052, | |
| "grad_norm": 1.9082565307617188, | |
| "learning_rate": 0.00011953681175092368, | |
| "epoch": 0.4, | |
| "step": 26800 | |
| }, | |
| { | |
| "loss": 1.1175, | |
| "grad_norm": 1.3584920167922974, | |
| "learning_rate": 0.00011946171637980235, | |
| "epoch": 0.4, | |
| "step": 26825 | |
| }, | |
| { | |
| "loss": 1.1264, | |
| "grad_norm": 2.0976293087005615, | |
| "learning_rate": 0.00011938662100868103, | |
| "epoch": 0.4, | |
| "step": 26850 | |
| }, | |
| { | |
| "loss": 1.2392, | |
| "grad_norm": 2.034069776535034, | |
| "learning_rate": 0.00011931152563755969, | |
| "epoch": 0.4, | |
| "step": 26875 | |
| }, | |
| { | |
| "loss": 1.1871, | |
| "grad_norm": 1.4389294385910034, | |
| "learning_rate": 0.00011923643026643839, | |
| "epoch": 0.4, | |
| "step": 26900 | |
| }, | |
| { | |
| "loss": 1.1544, | |
| "grad_norm": 1.7886531352996826, | |
| "learning_rate": 0.00011916133489531706, | |
| "epoch": 0.4, | |
| "step": 26925 | |
| }, | |
| { | |
| "loss": 1.1557, | |
| "grad_norm": 1.1227729320526123, | |
| "learning_rate": 0.00011908623952419573, | |
| "epoch": 0.4, | |
| "step": 26950 | |
| }, | |
| { | |
| "loss": 1.113, | |
| "grad_norm": 1.5332506895065308, | |
| "learning_rate": 0.00011901114415307442, | |
| "epoch": 0.41, | |
| "step": 26975 | |
| }, | |
| { | |
| "loss": 1.2323, | |
| "grad_norm": 1.5316015481948853, | |
| "learning_rate": 0.00011893604878195307, | |
| "epoch": 0.41, | |
| "step": 27000 | |
| }, | |
| { | |
| "loss": 1.1814, | |
| "grad_norm": 1.7715721130371094, | |
| "learning_rate": 0.00011886095341083177, | |
| "epoch": 0.41, | |
| "step": 27025 | |
| }, | |
| { | |
| "loss": 1.117, | |
| "grad_norm": 1.1491894721984863, | |
| "learning_rate": 0.00011878585803971043, | |
| "epoch": 0.41, | |
| "step": 27050 | |
| }, | |
| { | |
| "loss": 1.1541, | |
| "grad_norm": 1.2926276922225952, | |
| "learning_rate": 0.00011871076266858911, | |
| "epoch": 0.41, | |
| "step": 27075 | |
| }, | |
| { | |
| "loss": 1.1933, | |
| "grad_norm": 1.9231313467025757, | |
| "learning_rate": 0.0001186356672974678, | |
| "epoch": 0.41, | |
| "step": 27100 | |
| }, | |
| { | |
| "loss": 1.1409, | |
| "grad_norm": 2.1319782733917236, | |
| "learning_rate": 0.00011856057192634647, | |
| "epoch": 0.41, | |
| "step": 27125 | |
| }, | |
| { | |
| "loss": 1.2189, | |
| "grad_norm": 1.5915454626083374, | |
| "learning_rate": 0.00011848547655522515, | |
| "epoch": 0.41, | |
| "step": 27150 | |
| }, | |
| { | |
| "loss": 1.1335, | |
| "grad_norm": 1.5728384256362915, | |
| "learning_rate": 0.00011841038118410381, | |
| "epoch": 0.41, | |
| "step": 27175 | |
| }, | |
| { | |
| "loss": 1.1534, | |
| "grad_norm": 1.0829964876174927, | |
| "learning_rate": 0.0001183352858129825, | |
| "epoch": 0.41, | |
| "step": 27200 | |
| }, | |
| { | |
| "loss": 1.0959, | |
| "grad_norm": 2.1620664596557617, | |
| "learning_rate": 0.00011826019044186116, | |
| "epoch": 0.41, | |
| "step": 27225 | |
| }, | |
| { | |
| "loss": 1.1316, | |
| "grad_norm": 1.7385821342468262, | |
| "learning_rate": 0.00011818509507073985, | |
| "epoch": 0.41, | |
| "step": 27250 | |
| }, | |
| { | |
| "loss": 1.121, | |
| "grad_norm": 2.2649617195129395, | |
| "learning_rate": 0.00011810999969961851, | |
| "epoch": 0.41, | |
| "step": 27275 | |
| }, | |
| { | |
| "loss": 1.1831, | |
| "grad_norm": 1.98993980884552, | |
| "learning_rate": 0.00011803490432849719, | |
| "epoch": 0.41, | |
| "step": 27300 | |
| }, | |
| { | |
| "loss": 1.1661, | |
| "grad_norm": 1.3478261232376099, | |
| "learning_rate": 0.00011795980895737589, | |
| "epoch": 0.41, | |
| "step": 27325 | |
| }, | |
| { | |
| "loss": 1.1912, | |
| "grad_norm": 1.5697304010391235, | |
| "learning_rate": 0.00011788471358625455, | |
| "epoch": 0.41, | |
| "step": 27350 | |
| }, | |
| { | |
| "loss": 1.2364, | |
| "grad_norm": 1.7027043104171753, | |
| "learning_rate": 0.00011780961821513323, | |
| "epoch": 0.41, | |
| "step": 27375 | |
| }, | |
| { | |
| "loss": 1.1422, | |
| "grad_norm": 1.8551706075668335, | |
| "learning_rate": 0.0001177345228440119, | |
| "epoch": 0.41, | |
| "step": 27400 | |
| }, | |
| { | |
| "loss": 1.1839, | |
| "grad_norm": 1.9152601957321167, | |
| "learning_rate": 0.00011765942747289059, | |
| "epoch": 0.41, | |
| "step": 27425 | |
| }, | |
| { | |
| "loss": 1.201, | |
| "grad_norm": 2.2264368534088135, | |
| "learning_rate": 0.00011758433210176924, | |
| "epoch": 0.41, | |
| "step": 27450 | |
| }, | |
| { | |
| "loss": 1.1721, | |
| "grad_norm": 1.2363280057907104, | |
| "learning_rate": 0.00011750923673064793, | |
| "epoch": 0.41, | |
| "step": 27475 | |
| }, | |
| { | |
| "loss": 1.1533, | |
| "grad_norm": 1.1803810596466064, | |
| "learning_rate": 0.0001174341413595266, | |
| "epoch": 0.41, | |
| "step": 27500 | |
| }, | |
| { | |
| "loss": 1.1379, | |
| "grad_norm": 1.3785597085952759, | |
| "learning_rate": 0.00011735904598840528, | |
| "epoch": 0.41, | |
| "step": 27525 | |
| }, | |
| { | |
| "loss": 1.1466, | |
| "grad_norm": 1.222312331199646, | |
| "learning_rate": 0.00011728395061728397, | |
| "epoch": 0.41, | |
| "step": 27550 | |
| }, | |
| { | |
| "loss": 1.1886, | |
| "grad_norm": 1.3862022161483765, | |
| "learning_rate": 0.00011720885524616263, | |
| "epoch": 0.41, | |
| "step": 27575 | |
| }, | |
| { | |
| "loss": 1.1591, | |
| "grad_norm": 1.8599638938903809, | |
| "learning_rate": 0.00011713375987504131, | |
| "epoch": 0.41, | |
| "step": 27600 | |
| }, | |
| { | |
| "loss": 1.2353, | |
| "grad_norm": 2.57729172706604, | |
| "learning_rate": 0.00011705866450391998, | |
| "epoch": 0.41, | |
| "step": 27625 | |
| }, | |
| { | |
| "loss": 1.1376, | |
| "grad_norm": 1.734212040901184, | |
| "learning_rate": 0.00011698356913279867, | |
| "epoch": 0.42, | |
| "step": 27650 | |
| }, | |
| { | |
| "loss": 1.1869, | |
| "grad_norm": 1.5688458681106567, | |
| "learning_rate": 0.00011690847376167732, | |
| "epoch": 0.42, | |
| "step": 27675 | |
| }, | |
| { | |
| "loss": 1.2175, | |
| "grad_norm": 1.8952748775482178, | |
| "learning_rate": 0.00011683337839055601, | |
| "epoch": 0.42, | |
| "step": 27700 | |
| }, | |
| { | |
| "loss": 1.1552, | |
| "grad_norm": 1.0788073539733887, | |
| "learning_rate": 0.00011675828301943468, | |
| "epoch": 0.42, | |
| "step": 27725 | |
| }, | |
| { | |
| "loss": 1.1604, | |
| "grad_norm": 2.346510410308838, | |
| "learning_rate": 0.00011668318764831336, | |
| "epoch": 0.42, | |
| "step": 27750 | |
| }, | |
| { | |
| "loss": 1.1789, | |
| "grad_norm": 1.775448203086853, | |
| "learning_rate": 0.00011660809227719205, | |
| "epoch": 0.42, | |
| "step": 27775 | |
| }, | |
| { | |
| "loss": 1.1386, | |
| "grad_norm": 2.772287130355835, | |
| "learning_rate": 0.00011653299690607072, | |
| "epoch": 0.42, | |
| "step": 27800 | |
| }, | |
| { | |
| "loss": 1.1659, | |
| "grad_norm": 1.017460584640503, | |
| "learning_rate": 0.0001164579015349494, | |
| "epoch": 0.42, | |
| "step": 27825 | |
| }, | |
| { | |
| "loss": 1.1647, | |
| "grad_norm": 1.9251552820205688, | |
| "learning_rate": 0.00011638280616382806, | |
| "epoch": 0.42, | |
| "step": 27850 | |
| }, | |
| { | |
| "loss": 1.2273, | |
| "grad_norm": 1.44833242893219, | |
| "learning_rate": 0.00011630771079270675, | |
| "epoch": 0.42, | |
| "step": 27875 | |
| }, | |
| { | |
| "loss": 1.2112, | |
| "grad_norm": 1.3559473752975464, | |
| "learning_rate": 0.0001162326154215854, | |
| "epoch": 0.42, | |
| "step": 27900 | |
| }, | |
| { | |
| "loss": 1.219, | |
| "grad_norm": 2.958477258682251, | |
| "learning_rate": 0.0001161575200504641, | |
| "epoch": 0.42, | |
| "step": 27925 | |
| }, | |
| { | |
| "loss": 1.142, | |
| "grad_norm": 1.5322625637054443, | |
| "learning_rate": 0.00011608242467934276, | |
| "epoch": 0.42, | |
| "step": 27950 | |
| }, | |
| { | |
| "loss": 1.1681, | |
| "grad_norm": 1.0819323062896729, | |
| "learning_rate": 0.00011600732930822144, | |
| "epoch": 0.42, | |
| "step": 27975 | |
| }, | |
| { | |
| "loss": 1.1307, | |
| "grad_norm": 1.086040735244751, | |
| "learning_rate": 0.00011593223393710013, | |
| "epoch": 0.42, | |
| "step": 28000 | |
| }, | |
| { | |
| "loss": 1.1872, | |
| "grad_norm": 2.5879430770874023, | |
| "learning_rate": 0.0001158571385659788, | |
| "epoch": 0.42, | |
| "step": 28025 | |
| }, | |
| { | |
| "loss": 1.1294, | |
| "grad_norm": 1.5664997100830078, | |
| "learning_rate": 0.00011578204319485748, | |
| "epoch": 0.42, | |
| "step": 28050 | |
| }, | |
| { | |
| "loss": 1.1441, | |
| "grad_norm": 1.5841997861862183, | |
| "learning_rate": 0.00011570694782373614, | |
| "epoch": 0.42, | |
| "step": 28075 | |
| }, | |
| { | |
| "loss": 1.1663, | |
| "grad_norm": 1.0428881645202637, | |
| "learning_rate": 0.00011563185245261484, | |
| "epoch": 0.42, | |
| "step": 28100 | |
| }, | |
| { | |
| "loss": 1.1412, | |
| "grad_norm": 1.4296401739120483, | |
| "learning_rate": 0.00011555675708149349, | |
| "epoch": 0.42, | |
| "step": 28125 | |
| }, | |
| { | |
| "loss": 1.1706, | |
| "grad_norm": 1.3985766172409058, | |
| "learning_rate": 0.00011548166171037218, | |
| "epoch": 0.42, | |
| "step": 28150 | |
| }, | |
| { | |
| "loss": 1.1258, | |
| "grad_norm": 1.4393442869186401, | |
| "learning_rate": 0.00011540656633925085, | |
| "epoch": 0.42, | |
| "step": 28175 | |
| }, | |
| { | |
| "loss": 1.1365, | |
| "grad_norm": 1.9533663988113403, | |
| "learning_rate": 0.00011533147096812952, | |
| "epoch": 0.42, | |
| "step": 28200 | |
| }, | |
| { | |
| "loss": 1.1164, | |
| "grad_norm": 1.4977903366088867, | |
| "learning_rate": 0.00011525637559700822, | |
| "epoch": 0.42, | |
| "step": 28225 | |
| }, | |
| { | |
| "loss": 1.2153, | |
| "grad_norm": 1.857847809791565, | |
| "learning_rate": 0.00011518128022588688, | |
| "epoch": 0.42, | |
| "step": 28250 | |
| }, | |
| { | |
| "loss": 1.1949, | |
| "grad_norm": 1.774740219116211, | |
| "learning_rate": 0.00011510618485476556, | |
| "epoch": 0.42, | |
| "step": 28275 | |
| }, | |
| { | |
| "loss": 1.1738, | |
| "grad_norm": 1.1024271249771118, | |
| "learning_rate": 0.00011503108948364423, | |
| "epoch": 0.43, | |
| "step": 28300 | |
| }, | |
| { | |
| "loss": 1.1814, | |
| "grad_norm": 3.308375358581543, | |
| "learning_rate": 0.00011495599411252292, | |
| "epoch": 0.43, | |
| "step": 28325 | |
| }, | |
| { | |
| "loss": 1.1684, | |
| "grad_norm": 1.0142186880111694, | |
| "learning_rate": 0.00011488089874140158, | |
| "epoch": 0.43, | |
| "step": 28350 | |
| }, | |
| { | |
| "loss": 1.1109, | |
| "grad_norm": 2.34968638420105, | |
| "learning_rate": 0.00011480580337028026, | |
| "epoch": 0.43, | |
| "step": 28375 | |
| }, | |
| { | |
| "loss": 1.1584, | |
| "grad_norm": 1.1933414936065674, | |
| "learning_rate": 0.00011473070799915895, | |
| "epoch": 0.43, | |
| "step": 28400 | |
| }, | |
| { | |
| "loss": 1.1562, | |
| "grad_norm": 1.6852394342422485, | |
| "learning_rate": 0.0001146556126280376, | |
| "epoch": 0.43, | |
| "step": 28425 | |
| }, | |
| { | |
| "loss": 1.0875, | |
| "grad_norm": 1.276416301727295, | |
| "learning_rate": 0.0001145805172569163, | |
| "epoch": 0.43, | |
| "step": 28450 | |
| }, | |
| { | |
| "loss": 1.2261, | |
| "grad_norm": 2.792825937271118, | |
| "learning_rate": 0.00011450542188579496, | |
| "epoch": 0.43, | |
| "step": 28475 | |
| }, | |
| { | |
| "loss": 1.166, | |
| "grad_norm": 1.2241714000701904, | |
| "learning_rate": 0.00011443032651467364, | |
| "epoch": 0.43, | |
| "step": 28500 | |
| }, | |
| { | |
| "loss": 1.2041, | |
| "grad_norm": 1.3080065250396729, | |
| "learning_rate": 0.00011435523114355231, | |
| "epoch": 0.43, | |
| "step": 28525 | |
| }, | |
| { | |
| "loss": 1.1149, | |
| "grad_norm": 1.7479028701782227, | |
| "learning_rate": 0.000114280135772431, | |
| "epoch": 0.43, | |
| "step": 28550 | |
| }, | |
| { | |
| "loss": 1.1577, | |
| "grad_norm": 2.0920069217681885, | |
| "learning_rate": 0.00011420504040130967, | |
| "epoch": 0.43, | |
| "step": 28575 | |
| }, | |
| { | |
| "loss": 1.1439, | |
| "grad_norm": 1.1147267818450928, | |
| "learning_rate": 0.00011412994503018835, | |
| "epoch": 0.43, | |
| "step": 28600 | |
| }, | |
| { | |
| "loss": 1.1928, | |
| "grad_norm": 1.3617130517959595, | |
| "learning_rate": 0.00011405484965906704, | |
| "epoch": 0.43, | |
| "step": 28625 | |
| }, | |
| { | |
| "loss": 1.1807, | |
| "grad_norm": 1.8022890090942383, | |
| "learning_rate": 0.00011397975428794569, | |
| "epoch": 0.43, | |
| "step": 28650 | |
| }, | |
| { | |
| "loss": 1.1285, | |
| "grad_norm": 1.4314754009246826, | |
| "learning_rate": 0.00011390465891682438, | |
| "epoch": 0.43, | |
| "step": 28675 | |
| }, | |
| { | |
| "loss": 1.2854, | |
| "grad_norm": 1.2290889024734497, | |
| "learning_rate": 0.00011382956354570305, | |
| "epoch": 0.43, | |
| "step": 28700 | |
| }, | |
| { | |
| "loss": 1.1868, | |
| "grad_norm": 1.2961443662643433, | |
| "learning_rate": 0.00011375446817458173, | |
| "epoch": 0.43, | |
| "step": 28725 | |
| }, | |
| { | |
| "loss": 1.1627, | |
| "grad_norm": 1.629899501800537, | |
| "learning_rate": 0.00011367937280346039, | |
| "epoch": 0.43, | |
| "step": 28750 | |
| }, | |
| { | |
| "loss": 1.1232, | |
| "grad_norm": 1.3125689029693604, | |
| "learning_rate": 0.00011360427743233908, | |
| "epoch": 0.43, | |
| "step": 28775 | |
| }, | |
| { | |
| "loss": 1.1396, | |
| "grad_norm": 1.3124148845672607, | |
| "learning_rate": 0.00011352918206121775, | |
| "epoch": 0.43, | |
| "step": 28800 | |
| }, | |
| { | |
| "loss": 1.0702, | |
| "grad_norm": 1.1580018997192383, | |
| "learning_rate": 0.00011345408669009643, | |
| "epoch": 0.43, | |
| "step": 28825 | |
| }, | |
| { | |
| "loss": 1.1338, | |
| "grad_norm": 1.5117197036743164, | |
| "learning_rate": 0.00011337899131897512, | |
| "epoch": 0.43, | |
| "step": 28850 | |
| }, | |
| { | |
| "loss": 1.1742, | |
| "grad_norm": 1.6845176219940186, | |
| "learning_rate": 0.00011330389594785377, | |
| "epoch": 0.43, | |
| "step": 28875 | |
| }, | |
| { | |
| "loss": 1.1828, | |
| "grad_norm": 1.1892350912094116, | |
| "learning_rate": 0.00011322880057673246, | |
| "epoch": 0.43, | |
| "step": 28900 | |
| }, | |
| { | |
| "loss": 1.237, | |
| "grad_norm": 1.562537431716919, | |
| "learning_rate": 0.00011315370520561113, | |
| "epoch": 0.43, | |
| "step": 28925 | |
| }, | |
| { | |
| "loss": 1.193, | |
| "grad_norm": 1.7920253276824951, | |
| "learning_rate": 0.00011307860983448981, | |
| "epoch": 0.43, | |
| "step": 28950 | |
| }, | |
| { | |
| "loss": 1.1734, | |
| "grad_norm": 1.7338802814483643, | |
| "learning_rate": 0.00011300351446336847, | |
| "epoch": 0.44, | |
| "step": 28975 | |
| }, | |
| { | |
| "loss": 1.1254, | |
| "grad_norm": 1.6084978580474854, | |
| "learning_rate": 0.00011292841909224717, | |
| "epoch": 0.44, | |
| "step": 29000 | |
| }, | |
| { | |
| "loss": 1.1564, | |
| "grad_norm": 2.1127138137817383, | |
| "learning_rate": 0.00011285332372112583, | |
| "epoch": 0.44, | |
| "step": 29025 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 2.373610019683838, | |
| "learning_rate": 0.00011277822835000451, | |
| "epoch": 0.44, | |
| "step": 29050 | |
| }, | |
| { | |
| "loss": 1.0381, | |
| "grad_norm": 1.090454339981079, | |
| "learning_rate": 0.0001127031329788832, | |
| "epoch": 0.44, | |
| "step": 29075 | |
| }, | |
| { | |
| "loss": 1.1866, | |
| "grad_norm": 1.2997491359710693, | |
| "learning_rate": 0.00011262803760776185, | |
| "epoch": 0.44, | |
| "step": 29100 | |
| }, | |
| { | |
| "loss": 1.2086, | |
| "grad_norm": 1.9946448802947998, | |
| "learning_rate": 0.00011255294223664055, | |
| "epoch": 0.44, | |
| "step": 29125 | |
| }, | |
| { | |
| "loss": 1.1127, | |
| "grad_norm": 1.276667594909668, | |
| "learning_rate": 0.00011247784686551921, | |
| "epoch": 0.44, | |
| "step": 29150 | |
| }, | |
| { | |
| "loss": 1.2735, | |
| "grad_norm": 1.8735250234603882, | |
| "learning_rate": 0.00011240275149439789, | |
| "epoch": 0.44, | |
| "step": 29175 | |
| }, | |
| { | |
| "loss": 1.1269, | |
| "grad_norm": 1.4805363416671753, | |
| "learning_rate": 0.00011232765612327656, | |
| "epoch": 0.44, | |
| "step": 29200 | |
| }, | |
| { | |
| "loss": 1.1203, | |
| "grad_norm": 1.6462610960006714, | |
| "learning_rate": 0.00011225256075215525, | |
| "epoch": 0.44, | |
| "step": 29225 | |
| }, | |
| { | |
| "loss": 1.1661, | |
| "grad_norm": 1.334406852722168, | |
| "learning_rate": 0.00011217746538103391, | |
| "epoch": 0.44, | |
| "step": 29250 | |
| }, | |
| { | |
| "loss": 1.1603, | |
| "grad_norm": 1.3393394947052002, | |
| "learning_rate": 0.0001121023700099126, | |
| "epoch": 0.44, | |
| "step": 29275 | |
| }, | |
| { | |
| "loss": 1.1295, | |
| "grad_norm": 2.316953420639038, | |
| "learning_rate": 0.00011202727463879129, | |
| "epoch": 0.44, | |
| "step": 29300 | |
| }, | |
| { | |
| "loss": 1.1566, | |
| "grad_norm": 1.7229734659194946, | |
| "learning_rate": 0.00011195217926766994, | |
| "epoch": 0.44, | |
| "step": 29325 | |
| }, | |
| { | |
| "loss": 1.1669, | |
| "grad_norm": 2.08143949508667, | |
| "learning_rate": 0.00011187708389654863, | |
| "epoch": 0.44, | |
| "step": 29350 | |
| }, | |
| { | |
| "loss": 1.1525, | |
| "grad_norm": 2.7917256355285645, | |
| "learning_rate": 0.0001118019885254273, | |
| "epoch": 0.44, | |
| "step": 29375 | |
| }, | |
| { | |
| "loss": 1.1114, | |
| "grad_norm": 1.8444219827651978, | |
| "learning_rate": 0.00011172689315430597, | |
| "epoch": 0.44, | |
| "step": 29400 | |
| }, | |
| { | |
| "loss": 1.1588, | |
| "grad_norm": 1.2194463014602661, | |
| "learning_rate": 0.00011165179778318464, | |
| "epoch": 0.44, | |
| "step": 29425 | |
| }, | |
| { | |
| "loss": 1.2405, | |
| "grad_norm": 1.1201077699661255, | |
| "learning_rate": 0.00011157670241206333, | |
| "epoch": 0.44, | |
| "step": 29450 | |
| }, | |
| { | |
| "loss": 1.2188, | |
| "grad_norm": 2.771019220352173, | |
| "learning_rate": 0.000111501607040942, | |
| "epoch": 0.44, | |
| "step": 29475 | |
| }, | |
| { | |
| "loss": 1.1978, | |
| "grad_norm": 2.0680384635925293, | |
| "learning_rate": 0.00011142651166982068, | |
| "epoch": 0.44, | |
| "step": 29500 | |
| }, | |
| { | |
| "loss": 1.1814, | |
| "grad_norm": 1.2148905992507935, | |
| "learning_rate": 0.00011135141629869937, | |
| "epoch": 0.44, | |
| "step": 29525 | |
| }, | |
| { | |
| "loss": 1.1382, | |
| "grad_norm": 1.3024623394012451, | |
| "learning_rate": 0.00011127632092757802, | |
| "epoch": 0.44, | |
| "step": 29550 | |
| }, | |
| { | |
| "loss": 1.173, | |
| "grad_norm": 1.3196483850479126, | |
| "learning_rate": 0.00011120122555645671, | |
| "epoch": 0.44, | |
| "step": 29575 | |
| }, | |
| { | |
| "loss": 1.2608, | |
| "grad_norm": 1.9761130809783936, | |
| "learning_rate": 0.00011112613018533538, | |
| "epoch": 0.44, | |
| "step": 29600 | |
| }, | |
| { | |
| "loss": 1.1977, | |
| "grad_norm": 2.152472734451294, | |
| "learning_rate": 0.00011105103481421406, | |
| "epoch": 0.44, | |
| "step": 29625 | |
| }, | |
| { | |
| "loss": 1.2164, | |
| "grad_norm": 1.2230114936828613, | |
| "learning_rate": 0.00011097593944309272, | |
| "epoch": 0.45, | |
| "step": 29650 | |
| }, | |
| { | |
| "loss": 1.1368, | |
| "grad_norm": 1.2674063444137573, | |
| "learning_rate": 0.00011090084407197141, | |
| "epoch": 0.45, | |
| "step": 29675 | |
| }, | |
| { | |
| "loss": 1.1773, | |
| "grad_norm": 1.7089192867279053, | |
| "learning_rate": 0.00011082574870085008, | |
| "epoch": 0.45, | |
| "step": 29700 | |
| }, | |
| { | |
| "loss": 1.2058, | |
| "grad_norm": 1.6862412691116333, | |
| "learning_rate": 0.00011075065332972876, | |
| "epoch": 0.45, | |
| "step": 29725 | |
| }, | |
| { | |
| "loss": 1.1363, | |
| "grad_norm": 1.8428794145584106, | |
| "learning_rate": 0.00011067555795860745, | |
| "epoch": 0.45, | |
| "step": 29750 | |
| }, | |
| { | |
| "loss": 1.1239, | |
| "grad_norm": 1.7620809078216553, | |
| "learning_rate": 0.0001106004625874861, | |
| "epoch": 0.45, | |
| "step": 29775 | |
| }, | |
| { | |
| "loss": 1.1175, | |
| "grad_norm": 2.246371269226074, | |
| "learning_rate": 0.0001105253672163648, | |
| "epoch": 0.45, | |
| "step": 29800 | |
| }, | |
| { | |
| "loss": 1.1432, | |
| "grad_norm": 1.3259189128875732, | |
| "learning_rate": 0.00011045027184524346, | |
| "epoch": 0.45, | |
| "step": 29825 | |
| }, | |
| { | |
| "loss": 1.1352, | |
| "grad_norm": 1.642720103263855, | |
| "learning_rate": 0.00011037517647412214, | |
| "epoch": 0.45, | |
| "step": 29850 | |
| }, | |
| { | |
| "loss": 1.1642, | |
| "grad_norm": 1.3091384172439575, | |
| "learning_rate": 0.0001103000811030008, | |
| "epoch": 0.45, | |
| "step": 29875 | |
| }, | |
| { | |
| "loss": 1.155, | |
| "grad_norm": 1.44764244556427, | |
| "learning_rate": 0.0001102249857318795, | |
| "epoch": 0.45, | |
| "step": 29900 | |
| }, | |
| { | |
| "loss": 1.1144, | |
| "grad_norm": 3.290072441101074, | |
| "learning_rate": 0.00011014989036075818, | |
| "epoch": 0.45, | |
| "step": 29925 | |
| }, | |
| { | |
| "loss": 1.1852, | |
| "grad_norm": 1.8344993591308594, | |
| "learning_rate": 0.00011007479498963684, | |
| "epoch": 0.45, | |
| "step": 29950 | |
| }, | |
| { | |
| "loss": 1.2341, | |
| "grad_norm": 1.0677040815353394, | |
| "learning_rate": 0.00010999969961851553, | |
| "epoch": 0.45, | |
| "step": 29975 | |
| }, | |
| { | |
| "loss": 1.1522, | |
| "grad_norm": 1.430322527885437, | |
| "learning_rate": 0.00010992460424739419, | |
| "epoch": 0.45, | |
| "step": 30000 | |
| }, | |
| { | |
| "loss": 1.1885, | |
| "grad_norm": 2.407017230987549, | |
| "learning_rate": 0.00010984950887627288, | |
| "epoch": 0.45, | |
| "step": 30025 | |
| }, | |
| { | |
| "loss": 1.2033, | |
| "grad_norm": 1.9406884908676147, | |
| "learning_rate": 0.00010977441350515154, | |
| "epoch": 0.45, | |
| "step": 30050 | |
| }, | |
| { | |
| "loss": 1.1372, | |
| "grad_norm": 2.1446497440338135, | |
| "learning_rate": 0.00010969931813403022, | |
| "epoch": 0.45, | |
| "step": 30075 | |
| }, | |
| { | |
| "loss": 1.1924, | |
| "grad_norm": 1.4735894203186035, | |
| "learning_rate": 0.00010962422276290889, | |
| "epoch": 0.45, | |
| "step": 30100 | |
| }, | |
| { | |
| "loss": 1.1923, | |
| "grad_norm": 1.4889634847640991, | |
| "learning_rate": 0.00010954912739178758, | |
| "epoch": 0.45, | |
| "step": 30125 | |
| }, | |
| { | |
| "loss": 1.1588, | |
| "grad_norm": 1.8243343830108643, | |
| "learning_rate": 0.00010947403202066626, | |
| "epoch": 0.45, | |
| "step": 30150 | |
| }, | |
| { | |
| "loss": 1.1383, | |
| "grad_norm": 1.3423229455947876, | |
| "learning_rate": 0.00010939893664954492, | |
| "epoch": 0.45, | |
| "step": 30175 | |
| }, | |
| { | |
| "loss": 1.1461, | |
| "grad_norm": 2.072646141052246, | |
| "learning_rate": 0.00010932384127842362, | |
| "epoch": 0.45, | |
| "step": 30200 | |
| }, | |
| { | |
| "loss": 1.2188, | |
| "grad_norm": 2.139387845993042, | |
| "learning_rate": 0.00010924874590730227, | |
| "epoch": 0.45, | |
| "step": 30225 | |
| }, | |
| { | |
| "loss": 1.1639, | |
| "grad_norm": 6.252641677856445, | |
| "learning_rate": 0.00010917365053618096, | |
| "epoch": 0.45, | |
| "step": 30250 | |
| }, | |
| { | |
| "loss": 1.1542, | |
| "grad_norm": 1.225797176361084, | |
| "learning_rate": 0.00010909855516505963, | |
| "epoch": 0.45, | |
| "step": 30275 | |
| }, | |
| { | |
| "loss": 1.1593, | |
| "grad_norm": 1.5084859132766724, | |
| "learning_rate": 0.0001090234597939383, | |
| "epoch": 0.46, | |
| "step": 30300 | |
| }, | |
| { | |
| "loss": 1.1765, | |
| "grad_norm": 1.0552685260772705, | |
| "learning_rate": 0.00010894836442281697, | |
| "epoch": 0.46, | |
| "step": 30325 | |
| }, | |
| { | |
| "loss": 1.1944, | |
| "grad_norm": 3.4387400150299072, | |
| "learning_rate": 0.00010887326905169566, | |
| "epoch": 0.46, | |
| "step": 30350 | |
| }, | |
| { | |
| "loss": 1.0752, | |
| "grad_norm": 1.3896501064300537, | |
| "learning_rate": 0.00010879817368057434, | |
| "epoch": 0.46, | |
| "step": 30375 | |
| }, | |
| { | |
| "loss": 1.1576, | |
| "grad_norm": 1.6324450969696045, | |
| "learning_rate": 0.00010872307830945301, | |
| "epoch": 0.46, | |
| "step": 30400 | |
| }, | |
| { | |
| "loss": 1.1854, | |
| "grad_norm": 2.059718132019043, | |
| "learning_rate": 0.0001086479829383317, | |
| "epoch": 0.46, | |
| "step": 30425 | |
| }, | |
| { | |
| "loss": 1.1918, | |
| "grad_norm": 1.7998640537261963, | |
| "learning_rate": 0.00010857288756721035, | |
| "epoch": 0.46, | |
| "step": 30450 | |
| }, | |
| { | |
| "loss": 1.186, | |
| "grad_norm": 1.9032535552978516, | |
| "learning_rate": 0.00010849779219608904, | |
| "epoch": 0.46, | |
| "step": 30475 | |
| }, | |
| { | |
| "loss": 1.1738, | |
| "grad_norm": 1.7081289291381836, | |
| "learning_rate": 0.00010842269682496771, | |
| "epoch": 0.46, | |
| "step": 30500 | |
| }, | |
| { | |
| "loss": 1.1147, | |
| "grad_norm": 1.2194355726242065, | |
| "learning_rate": 0.00010834760145384639, | |
| "epoch": 0.46, | |
| "step": 30525 | |
| }, | |
| { | |
| "loss": 1.0827, | |
| "grad_norm": 1.396530032157898, | |
| "learning_rate": 0.00010827250608272505, | |
| "epoch": 0.46, | |
| "step": 30550 | |
| }, | |
| { | |
| "loss": 1.1544, | |
| "grad_norm": 2.1990020275115967, | |
| "learning_rate": 0.00010819741071160375, | |
| "epoch": 0.46, | |
| "step": 30575 | |
| }, | |
| { | |
| "loss": 1.1533, | |
| "grad_norm": 1.4652187824249268, | |
| "learning_rate": 0.00010812231534048242, | |
| "epoch": 0.46, | |
| "step": 30600 | |
| }, | |
| { | |
| "loss": 1.1761, | |
| "grad_norm": 1.4150506258010864, | |
| "learning_rate": 0.00010804721996936109, | |
| "epoch": 0.46, | |
| "step": 30625 | |
| }, | |
| { | |
| "loss": 1.2356, | |
| "grad_norm": 1.5214896202087402, | |
| "learning_rate": 0.00010797212459823978, | |
| "epoch": 0.46, | |
| "step": 30650 | |
| }, | |
| { | |
| "loss": 1.1893, | |
| "grad_norm": 1.470495581626892, | |
| "learning_rate": 0.00010789702922711843, | |
| "epoch": 0.46, | |
| "step": 30675 | |
| }, | |
| { | |
| "loss": 1.1708, | |
| "grad_norm": 2.846820592880249, | |
| "learning_rate": 0.00010782193385599713, | |
| "epoch": 0.46, | |
| "step": 30700 | |
| }, | |
| { | |
| "loss": 1.1546, | |
| "grad_norm": 1.4119728803634644, | |
| "learning_rate": 0.00010774683848487579, | |
| "epoch": 0.46, | |
| "step": 30725 | |
| }, | |
| { | |
| "loss": 1.1822, | |
| "grad_norm": 1.4061907529830933, | |
| "learning_rate": 0.00010767174311375447, | |
| "epoch": 0.46, | |
| "step": 30750 | |
| }, | |
| { | |
| "loss": 1.1481, | |
| "grad_norm": 1.3078978061676025, | |
| "learning_rate": 0.00010759664774263314, | |
| "epoch": 0.46, | |
| "step": 30775 | |
| }, | |
| { | |
| "loss": 1.1322, | |
| "grad_norm": 2.0098421573638916, | |
| "learning_rate": 0.00010752155237151183, | |
| "epoch": 0.46, | |
| "step": 30800 | |
| }, | |
| { | |
| "loss": 1.1943, | |
| "grad_norm": 2.3420894145965576, | |
| "learning_rate": 0.00010744645700039051, | |
| "epoch": 0.46, | |
| "step": 30825 | |
| }, | |
| { | |
| "loss": 1.1382, | |
| "grad_norm": 2.183663845062256, | |
| "learning_rate": 0.00010737136162926917, | |
| "epoch": 0.46, | |
| "step": 30850 | |
| }, | |
| { | |
| "loss": 1.2107, | |
| "grad_norm": 1.6581045389175415, | |
| "learning_rate": 0.00010729626625814786, | |
| "epoch": 0.46, | |
| "step": 30875 | |
| }, | |
| { | |
| "loss": 1.2586, | |
| "grad_norm": 1.961310625076294, | |
| "learning_rate": 0.00010722117088702652, | |
| "epoch": 0.46, | |
| "step": 30900 | |
| }, | |
| { | |
| "loss": 1.1607, | |
| "grad_norm": 1.231471061706543, | |
| "learning_rate": 0.00010714607551590521, | |
| "epoch": 0.46, | |
| "step": 30925 | |
| }, | |
| { | |
| "loss": 1.1467, | |
| "grad_norm": 1.653730869293213, | |
| "learning_rate": 0.00010707098014478387, | |
| "epoch": 0.46, | |
| "step": 30950 | |
| }, | |
| { | |
| "loss": 1.2346, | |
| "grad_norm": 1.830336332321167, | |
| "learning_rate": 0.00010699588477366255, | |
| "epoch": 0.47, | |
| "step": 30975 | |
| }, | |
| { | |
| "loss": 1.174, | |
| "grad_norm": 1.4249459505081177, | |
| "learning_rate": 0.00010692078940254122, | |
| "epoch": 0.47, | |
| "step": 31000 | |
| }, | |
| { | |
| "loss": 1.1379, | |
| "grad_norm": 1.7390903234481812, | |
| "learning_rate": 0.00010684569403141991, | |
| "epoch": 0.47, | |
| "step": 31025 | |
| }, | |
| { | |
| "loss": 1.2185, | |
| "grad_norm": 1.3198795318603516, | |
| "learning_rate": 0.00010677059866029859, | |
| "epoch": 0.47, | |
| "step": 31050 | |
| }, | |
| { | |
| "loss": 1.1644, | |
| "grad_norm": 1.7585688829421997, | |
| "learning_rate": 0.00010669550328917725, | |
| "epoch": 0.47, | |
| "step": 31075 | |
| }, | |
| { | |
| "loss": 1.2051, | |
| "grad_norm": 1.4614295959472656, | |
| "learning_rate": 0.00010662040791805595, | |
| "epoch": 0.47, | |
| "step": 31100 | |
| }, | |
| { | |
| "loss": 1.0994, | |
| "grad_norm": 2.1233184337615967, | |
| "learning_rate": 0.0001065453125469346, | |
| "epoch": 0.47, | |
| "step": 31125 | |
| }, | |
| { | |
| "loss": 1.1336, | |
| "grad_norm": 2.0219411849975586, | |
| "learning_rate": 0.00010647021717581329, | |
| "epoch": 0.47, | |
| "step": 31150 | |
| }, | |
| { | |
| "loss": 1.1349, | |
| "grad_norm": 1.2599328756332397, | |
| "learning_rate": 0.00010639512180469196, | |
| "epoch": 0.47, | |
| "step": 31175 | |
| }, | |
| { | |
| "loss": 1.2062, | |
| "grad_norm": 1.209994912147522, | |
| "learning_rate": 0.00010632002643357064, | |
| "epoch": 0.47, | |
| "step": 31200 | |
| }, | |
| { | |
| "loss": 1.1555, | |
| "grad_norm": 1.5804765224456787, | |
| "learning_rate": 0.00010624493106244933, | |
| "epoch": 0.47, | |
| "step": 31225 | |
| }, | |
| { | |
| "loss": 1.2051, | |
| "grad_norm": 1.906879186630249, | |
| "learning_rate": 0.000106169835691328, | |
| "epoch": 0.47, | |
| "step": 31250 | |
| }, | |
| { | |
| "loss": 1.2132, | |
| "grad_norm": 1.4314424991607666, | |
| "learning_rate": 0.00010609474032020667, | |
| "epoch": 0.47, | |
| "step": 31275 | |
| }, | |
| { | |
| "loss": 1.2105, | |
| "grad_norm": 1.4528160095214844, | |
| "learning_rate": 0.00010601964494908534, | |
| "epoch": 0.47, | |
| "step": 31300 | |
| }, | |
| { | |
| "loss": 1.2308, | |
| "grad_norm": 1.5849334001541138, | |
| "learning_rate": 0.00010594454957796403, | |
| "epoch": 0.47, | |
| "step": 31325 | |
| }, | |
| { | |
| "loss": 1.1983, | |
| "grad_norm": 1.6990954875946045, | |
| "learning_rate": 0.0001058694542068427, | |
| "epoch": 0.47, | |
| "step": 31350 | |
| }, | |
| { | |
| "loss": 1.1251, | |
| "grad_norm": 1.3091074228286743, | |
| "learning_rate": 0.00010579435883572137, | |
| "epoch": 0.47, | |
| "step": 31375 | |
| }, | |
| { | |
| "loss": 1.1784, | |
| "grad_norm": 2.4281911849975586, | |
| "learning_rate": 0.00010571926346460004, | |
| "epoch": 0.47, | |
| "step": 31400 | |
| }, | |
| { | |
| "loss": 1.1874, | |
| "grad_norm": 1.9910012483596802, | |
| "learning_rate": 0.00010564416809347872, | |
| "epoch": 0.47, | |
| "step": 31425 | |
| }, | |
| { | |
| "loss": 1.2311, | |
| "grad_norm": 2.100861072540283, | |
| "learning_rate": 0.00010556907272235741, | |
| "epoch": 0.47, | |
| "step": 31450 | |
| }, | |
| { | |
| "loss": 1.1173, | |
| "grad_norm": 1.6685750484466553, | |
| "learning_rate": 0.00010549397735123608, | |
| "epoch": 0.47, | |
| "step": 31475 | |
| }, | |
| { | |
| "loss": 1.1874, | |
| "grad_norm": 3.5001275539398193, | |
| "learning_rate": 0.00010541888198011476, | |
| "epoch": 0.47, | |
| "step": 31500 | |
| }, | |
| { | |
| "loss": 1.1384, | |
| "grad_norm": 1.6073639392852783, | |
| "learning_rate": 0.00010534378660899342, | |
| "epoch": 0.47, | |
| "step": 31525 | |
| }, | |
| { | |
| "loss": 1.2495, | |
| "grad_norm": 1.9744518995285034, | |
| "learning_rate": 0.00010526869123787211, | |
| "epoch": 0.47, | |
| "step": 31550 | |
| }, | |
| { | |
| "loss": 1.1328, | |
| "grad_norm": 1.4878309965133667, | |
| "learning_rate": 0.00010519359586675078, | |
| "epoch": 0.47, | |
| "step": 31575 | |
| }, | |
| { | |
| "loss": 1.2093, | |
| "grad_norm": 3.258043050765991, | |
| "learning_rate": 0.00010511850049562946, | |
| "epoch": 0.47, | |
| "step": 31600 | |
| }, | |
| { | |
| "loss": 1.1802, | |
| "grad_norm": 2.012786865234375, | |
| "learning_rate": 0.00010504340512450812, | |
| "epoch": 0.47, | |
| "step": 31625 | |
| }, | |
| { | |
| "loss": 1.096, | |
| "grad_norm": 1.3581587076187134, | |
| "learning_rate": 0.0001049683097533868, | |
| "epoch": 0.48, | |
| "step": 31650 | |
| }, | |
| { | |
| "loss": 1.1548, | |
| "grad_norm": 1.2571851015090942, | |
| "learning_rate": 0.0001048932143822655, | |
| "epoch": 0.48, | |
| "step": 31675 | |
| }, | |
| { | |
| "loss": 1.1615, | |
| "grad_norm": 1.5408381223678589, | |
| "learning_rate": 0.00010481811901114416, | |
| "epoch": 0.48, | |
| "step": 31700 | |
| }, | |
| { | |
| "loss": 1.1491, | |
| "grad_norm": 2.3489863872528076, | |
| "learning_rate": 0.00010474302364002284, | |
| "epoch": 0.48, | |
| "step": 31725 | |
| }, | |
| { | |
| "loss": 1.174, | |
| "grad_norm": 1.5670727491378784, | |
| "learning_rate": 0.0001046679282689015, | |
| "epoch": 0.48, | |
| "step": 31750 | |
| }, | |
| { | |
| "loss": 1.1087, | |
| "grad_norm": 1.6657809019088745, | |
| "learning_rate": 0.0001045928328977802, | |
| "epoch": 0.48, | |
| "step": 31775 | |
| }, | |
| { | |
| "loss": 1.1351, | |
| "grad_norm": 2.1541805267333984, | |
| "learning_rate": 0.00010451773752665886, | |
| "epoch": 0.48, | |
| "step": 31800 | |
| }, | |
| { | |
| "loss": 1.0992, | |
| "grad_norm": 1.6802806854248047, | |
| "learning_rate": 0.00010444264215553754, | |
| "epoch": 0.48, | |
| "step": 31825 | |
| }, | |
| { | |
| "loss": 1.2207, | |
| "grad_norm": 1.513509750366211, | |
| "learning_rate": 0.0001043675467844162, | |
| "epoch": 0.48, | |
| "step": 31850 | |
| }, | |
| { | |
| "loss": 1.1775, | |
| "grad_norm": 1.223694920539856, | |
| "learning_rate": 0.00010429245141329488, | |
| "epoch": 0.48, | |
| "step": 31875 | |
| }, | |
| { | |
| "loss": 1.1863, | |
| "grad_norm": 1.8998793363571167, | |
| "learning_rate": 0.00010421735604217358, | |
| "epoch": 0.48, | |
| "step": 31900 | |
| }, | |
| { | |
| "loss": 1.1404, | |
| "grad_norm": 2.1678850650787354, | |
| "learning_rate": 0.00010414226067105224, | |
| "epoch": 0.48, | |
| "step": 31925 | |
| }, | |
| { | |
| "loss": 1.1979, | |
| "grad_norm": 1.7826672792434692, | |
| "learning_rate": 0.00010406716529993092, | |
| "epoch": 0.48, | |
| "step": 31950 | |
| }, | |
| { | |
| "loss": 1.2179, | |
| "grad_norm": 2.165457248687744, | |
| "learning_rate": 0.00010399206992880959, | |
| "epoch": 0.48, | |
| "step": 31975 | |
| }, | |
| { | |
| "loss": 1.1489, | |
| "grad_norm": 1.6185364723205566, | |
| "learning_rate": 0.00010391697455768828, | |
| "epoch": 0.48, | |
| "step": 32000 | |
| }, | |
| { | |
| "loss": 1.1699, | |
| "grad_norm": 1.2954517602920532, | |
| "learning_rate": 0.00010384187918656694, | |
| "epoch": 0.48, | |
| "step": 32025 | |
| }, | |
| { | |
| "loss": 1.2003, | |
| "grad_norm": 1.919216275215149, | |
| "learning_rate": 0.00010376678381544562, | |
| "epoch": 0.48, | |
| "step": 32050 | |
| }, | |
| { | |
| "loss": 1.1411, | |
| "grad_norm": 1.045401692390442, | |
| "learning_rate": 0.00010369168844432429, | |
| "epoch": 0.48, | |
| "step": 32075 | |
| }, | |
| { | |
| "loss": 1.1166, | |
| "grad_norm": 1.860318660736084, | |
| "learning_rate": 0.00010361659307320297, | |
| "epoch": 0.48, | |
| "step": 32100 | |
| }, | |
| { | |
| "loss": 1.2215, | |
| "grad_norm": 0.9368788599967957, | |
| "learning_rate": 0.00010354149770208166, | |
| "epoch": 0.48, | |
| "step": 32125 | |
| }, | |
| { | |
| "loss": 1.1697, | |
| "grad_norm": 1.7075835466384888, | |
| "learning_rate": 0.00010346640233096032, | |
| "epoch": 0.48, | |
| "step": 32150 | |
| }, | |
| { | |
| "loss": 1.1934, | |
| "grad_norm": 1.1933406591415405, | |
| "learning_rate": 0.000103391306959839, | |
| "epoch": 0.48, | |
| "step": 32175 | |
| }, | |
| { | |
| "loss": 1.2051, | |
| "grad_norm": 1.681666612625122, | |
| "learning_rate": 0.00010331621158871767, | |
| "epoch": 0.48, | |
| "step": 32200 | |
| }, | |
| { | |
| "loss": 1.1314, | |
| "grad_norm": 2.0042386054992676, | |
| "learning_rate": 0.00010324111621759636, | |
| "epoch": 0.48, | |
| "step": 32225 | |
| }, | |
| { | |
| "loss": 1.1505, | |
| "grad_norm": 1.976456880569458, | |
| "learning_rate": 0.00010316602084647503, | |
| "epoch": 0.48, | |
| "step": 32250 | |
| }, | |
| { | |
| "loss": 1.1234, | |
| "grad_norm": 1.852589726448059, | |
| "learning_rate": 0.0001030909254753537, | |
| "epoch": 0.48, | |
| "step": 32275 | |
| }, | |
| { | |
| "loss": 1.1631, | |
| "grad_norm": 1.812740445137024, | |
| "learning_rate": 0.00010301583010423237, | |
| "epoch": 0.49, | |
| "step": 32300 | |
| }, | |
| { | |
| "loss": 1.2414, | |
| "grad_norm": 1.424230694770813, | |
| "learning_rate": 0.00010294073473311105, | |
| "epoch": 0.49, | |
| "step": 32325 | |
| }, | |
| { | |
| "loss": 1.1732, | |
| "grad_norm": 1.4877756834030151, | |
| "learning_rate": 0.00010286563936198974, | |
| "epoch": 0.49, | |
| "step": 32350 | |
| }, | |
| { | |
| "loss": 1.0968, | |
| "grad_norm": 0.8852760195732117, | |
| "learning_rate": 0.00010279054399086841, | |
| "epoch": 0.49, | |
| "step": 32375 | |
| }, | |
| { | |
| "loss": 1.1685, | |
| "grad_norm": 1.217244029045105, | |
| "learning_rate": 0.00010271544861974709, | |
| "epoch": 0.49, | |
| "step": 32400 | |
| }, | |
| { | |
| "loss": 1.2013, | |
| "grad_norm": 1.0668590068817139, | |
| "learning_rate": 0.00010264035324862575, | |
| "epoch": 0.49, | |
| "step": 32425 | |
| }, | |
| { | |
| "loss": 1.2111, | |
| "grad_norm": 2.8531405925750732, | |
| "learning_rate": 0.00010256525787750444, | |
| "epoch": 0.49, | |
| "step": 32450 | |
| }, | |
| { | |
| "loss": 1.1358, | |
| "grad_norm": 1.3973661661148071, | |
| "learning_rate": 0.00010249016250638311, | |
| "epoch": 0.49, | |
| "step": 32475 | |
| }, | |
| { | |
| "loss": 1.1817, | |
| "grad_norm": 1.641974925994873, | |
| "learning_rate": 0.00010241506713526179, | |
| "epoch": 0.49, | |
| "step": 32500 | |
| }, | |
| { | |
| "loss": 1.162, | |
| "grad_norm": 1.5248854160308838, | |
| "learning_rate": 0.00010233997176414048, | |
| "epoch": 0.49, | |
| "step": 32525 | |
| }, | |
| { | |
| "loss": 1.1503, | |
| "grad_norm": 1.7267481088638306, | |
| "learning_rate": 0.00010226487639301913, | |
| "epoch": 0.49, | |
| "step": 32550 | |
| }, | |
| { | |
| "loss": 1.1459, | |
| "grad_norm": 1.0616050958633423, | |
| "learning_rate": 0.00010218978102189782, | |
| "epoch": 0.49, | |
| "step": 32575 | |
| }, | |
| { | |
| "loss": 1.0668, | |
| "grad_norm": 3.3019354343414307, | |
| "learning_rate": 0.00010211468565077649, | |
| "epoch": 0.49, | |
| "step": 32600 | |
| }, | |
| { | |
| "loss": 1.1959, | |
| "grad_norm": 0.9270702004432678, | |
| "learning_rate": 0.00010203959027965517, | |
| "epoch": 0.49, | |
| "step": 32625 | |
| }, | |
| { | |
| "loss": 1.1643, | |
| "grad_norm": 1.6093809604644775, | |
| "learning_rate": 0.00010196449490853383, | |
| "epoch": 0.49, | |
| "step": 32650 | |
| }, | |
| { | |
| "loss": 1.1398, | |
| "grad_norm": 1.0636630058288574, | |
| "learning_rate": 0.00010188939953741253, | |
| "epoch": 0.49, | |
| "step": 32675 | |
| }, | |
| { | |
| "loss": 1.1828, | |
| "grad_norm": 1.4841707944869995, | |
| "learning_rate": 0.00010181430416629119, | |
| "epoch": 0.49, | |
| "step": 32700 | |
| }, | |
| { | |
| "loss": 1.2011, | |
| "grad_norm": 1.9186432361602783, | |
| "learning_rate": 0.00010173920879516987, | |
| "epoch": 0.49, | |
| "step": 32725 | |
| }, | |
| { | |
| "loss": 1.1309, | |
| "grad_norm": 1.3214590549468994, | |
| "learning_rate": 0.00010166411342404856, | |
| "epoch": 0.49, | |
| "step": 32750 | |
| }, | |
| { | |
| "loss": 1.1553, | |
| "grad_norm": 1.2666594982147217, | |
| "learning_rate": 0.00010158901805292721, | |
| "epoch": 0.49, | |
| "step": 32775 | |
| }, | |
| { | |
| "loss": 1.1451, | |
| "grad_norm": 1.2383131980895996, | |
| "learning_rate": 0.00010151392268180591, | |
| "epoch": 0.49, | |
| "step": 32800 | |
| }, | |
| { | |
| "loss": 1.1737, | |
| "grad_norm": 1.585282564163208, | |
| "learning_rate": 0.00010143882731068457, | |
| "epoch": 0.49, | |
| "step": 32825 | |
| }, | |
| { | |
| "loss": 1.189, | |
| "grad_norm": 2.2569665908813477, | |
| "learning_rate": 0.00010136373193956325, | |
| "epoch": 0.49, | |
| "step": 32850 | |
| }, | |
| { | |
| "loss": 1.1316, | |
| "grad_norm": 1.6479202508926392, | |
| "learning_rate": 0.00010128863656844192, | |
| "epoch": 0.49, | |
| "step": 32875 | |
| }, | |
| { | |
| "loss": 1.2062, | |
| "grad_norm": 1.6843442916870117, | |
| "learning_rate": 0.00010121354119732061, | |
| "epoch": 0.49, | |
| "step": 32900 | |
| }, | |
| { | |
| "loss": 1.216, | |
| "grad_norm": 1.4396450519561768, | |
| "learning_rate": 0.00010113844582619927, | |
| "epoch": 0.49, | |
| "step": 32925 | |
| }, | |
| { | |
| "loss": 1.1504, | |
| "grad_norm": 2.33687162399292, | |
| "learning_rate": 0.00010106335045507795, | |
| "epoch": 0.49, | |
| "step": 32950 | |
| }, | |
| { | |
| "loss": 1.1324, | |
| "grad_norm": 1.071869969367981, | |
| "learning_rate": 0.00010098825508395665, | |
| "epoch": 0.5, | |
| "step": 32975 | |
| }, | |
| { | |
| "loss": 1.1709, | |
| "grad_norm": 1.5846800804138184, | |
| "learning_rate": 0.0001009131597128353, | |
| "epoch": 0.5, | |
| "step": 33000 | |
| }, | |
| { | |
| "loss": 1.1503, | |
| "grad_norm": 2.1538047790527344, | |
| "learning_rate": 0.00010083806434171399, | |
| "epoch": 0.5, | |
| "step": 33025 | |
| }, | |
| { | |
| "loss": 1.1561, | |
| "grad_norm": 1.5584303140640259, | |
| "learning_rate": 0.00010076296897059266, | |
| "epoch": 0.5, | |
| "step": 33050 | |
| }, | |
| { | |
| "loss": 1.156, | |
| "grad_norm": 1.192090392112732, | |
| "learning_rate": 0.00010068787359947133, | |
| "epoch": 0.5, | |
| "step": 33075 | |
| }, | |
| { | |
| "loss": 1.1814, | |
| "grad_norm": 1.8236268758773804, | |
| "learning_rate": 0.00010061277822835, | |
| "epoch": 0.5, | |
| "step": 33100 | |
| }, | |
| { | |
| "loss": 1.1463, | |
| "grad_norm": 1.067664384841919, | |
| "learning_rate": 0.00010053768285722869, | |
| "epoch": 0.5, | |
| "step": 33125 | |
| }, | |
| { | |
| "loss": 1.151, | |
| "grad_norm": 2.6256847381591797, | |
| "learning_rate": 0.00010046258748610736, | |
| "epoch": 0.5, | |
| "step": 33150 | |
| }, | |
| { | |
| "loss": 1.2417, | |
| "grad_norm": 2.632324695587158, | |
| "learning_rate": 0.00010038749211498604, | |
| "epoch": 0.5, | |
| "step": 33175 | |
| }, | |
| { | |
| "loss": 1.2562, | |
| "grad_norm": 1.5089225769042969, | |
| "learning_rate": 0.00010031239674386473, | |
| "epoch": 0.5, | |
| "step": 33200 | |
| }, | |
| { | |
| "loss": 1.1732, | |
| "grad_norm": 2.253978967666626, | |
| "learning_rate": 0.00010023730137274338, | |
| "epoch": 0.5, | |
| "step": 33225 | |
| }, | |
| { | |
| "loss": 1.1754, | |
| "grad_norm": 1.3430489301681519, | |
| "learning_rate": 0.00010016220600162207, | |
| "epoch": 0.5, | |
| "step": 33250 | |
| }, | |
| { | |
| "loss": 1.1398, | |
| "grad_norm": 1.406375765800476, | |
| "learning_rate": 0.00010008711063050074, | |
| "epoch": 0.5, | |
| "step": 33275 | |
| }, | |
| { | |
| "loss": 1.1073, | |
| "grad_norm": 1.3083038330078125, | |
| "learning_rate": 0.00010001201525937942, | |
| "epoch": 0.5, | |
| "step": 33300 | |
| }, | |
| { | |
| "loss": 1.1303, | |
| "grad_norm": 1.0769158601760864, | |
| "learning_rate": 9.99369198882581e-05, | |
| "epoch": 0.5, | |
| "step": 33325 | |
| }, | |
| { | |
| "loss": 1.1311, | |
| "grad_norm": 1.7924445867538452, | |
| "learning_rate": 9.986182451713677e-05, | |
| "epoch": 0.5, | |
| "step": 33350 | |
| }, | |
| { | |
| "loss": 1.1369, | |
| "grad_norm": 2.062908411026001, | |
| "learning_rate": 9.978672914601544e-05, | |
| "epoch": 0.5, | |
| "step": 33375 | |
| }, | |
| { | |
| "loss": 1.0535, | |
| "grad_norm": 1.7589771747589111, | |
| "learning_rate": 9.971163377489412e-05, | |
| "epoch": 0.5, | |
| "step": 33400 | |
| }, | |
| { | |
| "loss": 1.2129, | |
| "grad_norm": 2.6144607067108154, | |
| "learning_rate": 9.96365384037728e-05, | |
| "epoch": 0.5, | |
| "step": 33425 | |
| }, | |
| { | |
| "loss": 1.1676, | |
| "grad_norm": 1.4699029922485352, | |
| "learning_rate": 9.956144303265146e-05, | |
| "epoch": 0.5, | |
| "step": 33450 | |
| }, | |
| { | |
| "loss": 1.1679, | |
| "grad_norm": 2.1169466972351074, | |
| "learning_rate": 9.948634766153014e-05, | |
| "epoch": 0.5, | |
| "step": 33475 | |
| }, | |
| { | |
| "loss": 1.1909, | |
| "grad_norm": 2.1322262287139893, | |
| "learning_rate": 9.941125229040883e-05, | |
| "epoch": 0.5, | |
| "step": 33500 | |
| }, | |
| { | |
| "loss": 1.1367, | |
| "grad_norm": 1.2691850662231445, | |
| "learning_rate": 9.93361569192875e-05, | |
| "epoch": 0.5, | |
| "step": 33525 | |
| }, | |
| { | |
| "loss": 1.1963, | |
| "grad_norm": 1.6899739503860474, | |
| "learning_rate": 9.926106154816618e-05, | |
| "epoch": 0.5, | |
| "step": 33550 | |
| }, | |
| { | |
| "loss": 1.174, | |
| "grad_norm": 2.2241880893707275, | |
| "learning_rate": 9.918596617704486e-05, | |
| "epoch": 0.5, | |
| "step": 33575 | |
| }, | |
| { | |
| "loss": 1.0904, | |
| "grad_norm": 1.1701431274414062, | |
| "learning_rate": 9.911087080592352e-05, | |
| "epoch": 0.5, | |
| "step": 33600 | |
| }, | |
| { | |
| "loss": 1.1726, | |
| "grad_norm": 2.3160314559936523, | |
| "learning_rate": 9.90357754348022e-05, | |
| "epoch": 0.51, | |
| "step": 33625 | |
| }, | |
| { | |
| "loss": 1.2542, | |
| "grad_norm": 1.301832675933838, | |
| "learning_rate": 9.896068006368088e-05, | |
| "epoch": 0.51, | |
| "step": 33650 | |
| }, | |
| { | |
| "loss": 1.1653, | |
| "grad_norm": 2.0493037700653076, | |
| "learning_rate": 9.888558469255955e-05, | |
| "epoch": 0.51, | |
| "step": 33675 | |
| }, | |
| { | |
| "loss": 1.1477, | |
| "grad_norm": 1.5900102853775024, | |
| "learning_rate": 9.881048932143822e-05, | |
| "epoch": 0.51, | |
| "step": 33700 | |
| }, | |
| { | |
| "loss": 1.2375, | |
| "grad_norm": 1.2943110466003418, | |
| "learning_rate": 9.873539395031692e-05, | |
| "epoch": 0.51, | |
| "step": 33725 | |
| }, | |
| { | |
| "loss": 1.1826, | |
| "grad_norm": 1.2338217496871948, | |
| "learning_rate": 9.866029857919558e-05, | |
| "epoch": 0.51, | |
| "step": 33750 | |
| }, | |
| { | |
| "loss": 1.0829, | |
| "grad_norm": 1.4232094287872314, | |
| "learning_rate": 9.858520320807426e-05, | |
| "epoch": 0.51, | |
| "step": 33775 | |
| }, | |
| { | |
| "loss": 1.1526, | |
| "grad_norm": 1.8396953344345093, | |
| "learning_rate": 9.851010783695294e-05, | |
| "epoch": 0.51, | |
| "step": 33800 | |
| }, | |
| { | |
| "loss": 1.1136, | |
| "grad_norm": 2.607694625854492, | |
| "learning_rate": 9.84350124658316e-05, | |
| "epoch": 0.51, | |
| "step": 33825 | |
| }, | |
| { | |
| "loss": 1.1214, | |
| "grad_norm": 1.8624433279037476, | |
| "learning_rate": 9.835991709471028e-05, | |
| "epoch": 0.51, | |
| "step": 33850 | |
| }, | |
| { | |
| "loss": 1.1397, | |
| "grad_norm": 0.9864051938056946, | |
| "learning_rate": 9.828482172358896e-05, | |
| "epoch": 0.51, | |
| "step": 33875 | |
| }, | |
| { | |
| "loss": 1.1547, | |
| "grad_norm": 2.811202049255371, | |
| "learning_rate": 9.820972635246763e-05, | |
| "epoch": 0.51, | |
| "step": 33900 | |
| }, | |
| { | |
| "loss": 1.1279, | |
| "grad_norm": 1.2450023889541626, | |
| "learning_rate": 9.813463098134631e-05, | |
| "epoch": 0.51, | |
| "step": 33925 | |
| }, | |
| { | |
| "loss": 1.1307, | |
| "grad_norm": 1.43215811252594, | |
| "learning_rate": 9.8059535610225e-05, | |
| "epoch": 0.51, | |
| "step": 33950 | |
| }, | |
| { | |
| "loss": 1.2241, | |
| "grad_norm": 2.1191306114196777, | |
| "learning_rate": 9.798444023910366e-05, | |
| "epoch": 0.51, | |
| "step": 33975 | |
| }, | |
| { | |
| "loss": 1.0813, | |
| "grad_norm": 1.793713092803955, | |
| "learning_rate": 9.790934486798234e-05, | |
| "epoch": 0.51, | |
| "step": 34000 | |
| }, | |
| { | |
| "loss": 1.1351, | |
| "grad_norm": 1.3615442514419556, | |
| "learning_rate": 9.783424949686102e-05, | |
| "epoch": 0.51, | |
| "step": 34025 | |
| }, | |
| { | |
| "loss": 1.1081, | |
| "grad_norm": 1.8019038438796997, | |
| "learning_rate": 9.775915412573969e-05, | |
| "epoch": 0.51, | |
| "step": 34050 | |
| }, | |
| { | |
| "loss": 1.1063, | |
| "grad_norm": 2.837644100189209, | |
| "learning_rate": 9.768405875461837e-05, | |
| "epoch": 0.51, | |
| "step": 34075 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 1.2097357511520386, | |
| "learning_rate": 9.760896338349705e-05, | |
| "epoch": 0.51, | |
| "step": 34100 | |
| }, | |
| { | |
| "loss": 1.1942, | |
| "grad_norm": 1.1675305366516113, | |
| "learning_rate": 9.753386801237571e-05, | |
| "epoch": 0.51, | |
| "step": 34125 | |
| }, | |
| { | |
| "loss": 1.1185, | |
| "grad_norm": 1.949704885482788, | |
| "learning_rate": 9.745877264125439e-05, | |
| "epoch": 0.51, | |
| "step": 34150 | |
| }, | |
| { | |
| "loss": 1.1904, | |
| "grad_norm": 1.5967111587524414, | |
| "learning_rate": 9.738367727013308e-05, | |
| "epoch": 0.51, | |
| "step": 34175 | |
| }, | |
| { | |
| "loss": 1.0594, | |
| "grad_norm": 0.8626694083213806, | |
| "learning_rate": 9.730858189901175e-05, | |
| "epoch": 0.51, | |
| "step": 34200 | |
| }, | |
| { | |
| "loss": 1.1637, | |
| "grad_norm": 1.7055697441101074, | |
| "learning_rate": 9.723348652789043e-05, | |
| "epoch": 0.51, | |
| "step": 34225 | |
| }, | |
| { | |
| "loss": 1.1241, | |
| "grad_norm": 1.559312105178833, | |
| "learning_rate": 9.71583911567691e-05, | |
| "epoch": 0.51, | |
| "step": 34250 | |
| }, | |
| { | |
| "loss": 1.1188, | |
| "grad_norm": 1.095895767211914, | |
| "learning_rate": 9.708329578564777e-05, | |
| "epoch": 0.51, | |
| "step": 34275 | |
| }, | |
| { | |
| "loss": 1.1647, | |
| "grad_norm": 1.85615074634552, | |
| "learning_rate": 9.700820041452645e-05, | |
| "epoch": 0.52, | |
| "step": 34300 | |
| }, | |
| { | |
| "loss": 1.2034, | |
| "grad_norm": 1.3546233177185059, | |
| "learning_rate": 9.693310504340513e-05, | |
| "epoch": 0.52, | |
| "step": 34325 | |
| }, | |
| { | |
| "loss": 1.1603, | |
| "grad_norm": 1.756169080734253, | |
| "learning_rate": 9.68580096722838e-05, | |
| "epoch": 0.52, | |
| "step": 34350 | |
| }, | |
| { | |
| "loss": 1.1961, | |
| "grad_norm": 1.8548756837844849, | |
| "learning_rate": 9.678291430116249e-05, | |
| "epoch": 0.52, | |
| "step": 34375 | |
| }, | |
| { | |
| "loss": 1.1465, | |
| "grad_norm": 1.8958889245986938, | |
| "learning_rate": 9.670781893004116e-05, | |
| "epoch": 0.52, | |
| "step": 34400 | |
| }, | |
| { | |
| "loss": 1.1634, | |
| "grad_norm": 1.0698497295379639, | |
| "learning_rate": 9.663272355891983e-05, | |
| "epoch": 0.52, | |
| "step": 34425 | |
| }, | |
| { | |
| "loss": 1.1787, | |
| "grad_norm": 1.7011641263961792, | |
| "learning_rate": 9.655762818779851e-05, | |
| "epoch": 0.52, | |
| "step": 34450 | |
| }, | |
| { | |
| "loss": 1.2188, | |
| "grad_norm": 1.8526285886764526, | |
| "learning_rate": 9.648253281667719e-05, | |
| "epoch": 0.52, | |
| "step": 34475 | |
| }, | |
| { | |
| "loss": 1.1691, | |
| "grad_norm": 1.3944551944732666, | |
| "learning_rate": 9.640743744555585e-05, | |
| "epoch": 0.52, | |
| "step": 34500 | |
| }, | |
| { | |
| "loss": 1.2114, | |
| "grad_norm": 1.9814480543136597, | |
| "learning_rate": 9.633234207443453e-05, | |
| "epoch": 0.52, | |
| "step": 34525 | |
| }, | |
| { | |
| "loss": 1.2095, | |
| "grad_norm": 2.6231672763824463, | |
| "learning_rate": 9.625724670331321e-05, | |
| "epoch": 0.52, | |
| "step": 34550 | |
| }, | |
| { | |
| "loss": 1.0963, | |
| "grad_norm": 1.4759525060653687, | |
| "learning_rate": 9.618215133219189e-05, | |
| "epoch": 0.52, | |
| "step": 34575 | |
| }, | |
| { | |
| "loss": 1.1529, | |
| "grad_norm": 1.029731273651123, | |
| "learning_rate": 9.610705596107057e-05, | |
| "epoch": 0.52, | |
| "step": 34600 | |
| }, | |
| { | |
| "loss": 1.1502, | |
| "grad_norm": 1.3262224197387695, | |
| "learning_rate": 9.603196058994925e-05, | |
| "epoch": 0.52, | |
| "step": 34625 | |
| }, | |
| { | |
| "loss": 1.1467, | |
| "grad_norm": 2.3982503414154053, | |
| "learning_rate": 9.595686521882791e-05, | |
| "epoch": 0.52, | |
| "step": 34650 | |
| }, | |
| { | |
| "loss": 1.1648, | |
| "grad_norm": 1.6530815362930298, | |
| "learning_rate": 9.588176984770659e-05, | |
| "epoch": 0.52, | |
| "step": 34675 | |
| }, | |
| { | |
| "loss": 1.1066, | |
| "grad_norm": 1.987338662147522, | |
| "learning_rate": 9.580667447658527e-05, | |
| "epoch": 0.52, | |
| "step": 34700 | |
| }, | |
| { | |
| "loss": 1.1234, | |
| "grad_norm": 1.4502911567687988, | |
| "learning_rate": 9.573157910546394e-05, | |
| "epoch": 0.52, | |
| "step": 34725 | |
| }, | |
| { | |
| "loss": 1.1992, | |
| "grad_norm": 2.197833776473999, | |
| "learning_rate": 9.565648373434261e-05, | |
| "epoch": 0.52, | |
| "step": 34750 | |
| }, | |
| { | |
| "loss": 1.1252, | |
| "grad_norm": 1.55136239528656, | |
| "learning_rate": 9.55813883632213e-05, | |
| "epoch": 0.52, | |
| "step": 34775 | |
| }, | |
| { | |
| "loss": 1.2176, | |
| "grad_norm": 3.1894094944000244, | |
| "learning_rate": 9.550629299209997e-05, | |
| "epoch": 0.52, | |
| "step": 34800 | |
| }, | |
| { | |
| "loss": 1.232, | |
| "grad_norm": 1.971354603767395, | |
| "learning_rate": 9.543119762097865e-05, | |
| "epoch": 0.52, | |
| "step": 34825 | |
| }, | |
| { | |
| "loss": 1.1468, | |
| "grad_norm": 2.7667553424835205, | |
| "learning_rate": 9.535610224985733e-05, | |
| "epoch": 0.52, | |
| "step": 34850 | |
| }, | |
| { | |
| "loss": 1.0825, | |
| "grad_norm": 1.0196588039398193, | |
| "learning_rate": 9.5281006878736e-05, | |
| "epoch": 0.52, | |
| "step": 34875 | |
| }, | |
| { | |
| "loss": 1.1179, | |
| "grad_norm": 1.2213141918182373, | |
| "learning_rate": 9.520591150761467e-05, | |
| "epoch": 0.52, | |
| "step": 34900 | |
| }, | |
| { | |
| "loss": 1.1399, | |
| "grad_norm": 1.981288194656372, | |
| "learning_rate": 9.513081613649335e-05, | |
| "epoch": 0.52, | |
| "step": 34925 | |
| }, | |
| { | |
| "loss": 1.1343, | |
| "grad_norm": 1.6537185907363892, | |
| "learning_rate": 9.505572076537202e-05, | |
| "epoch": 0.52, | |
| "step": 34950 | |
| }, | |
| { | |
| "loss": 1.0935, | |
| "grad_norm": 1.4479026794433594, | |
| "learning_rate": 9.49806253942507e-05, | |
| "epoch": 0.53, | |
| "step": 34975 | |
| }, | |
| { | |
| "loss": 1.1032, | |
| "grad_norm": 1.706716537475586, | |
| "learning_rate": 9.490553002312938e-05, | |
| "epoch": 0.53, | |
| "step": 35000 | |
| }, | |
| { | |
| "loss": 1.2447, | |
| "grad_norm": 1.594125747680664, | |
| "learning_rate": 9.483043465200806e-05, | |
| "epoch": 0.53, | |
| "step": 35025 | |
| }, | |
| { | |
| "loss": 1.1596, | |
| "grad_norm": 1.97038996219635, | |
| "learning_rate": 9.475533928088673e-05, | |
| "epoch": 0.53, | |
| "step": 35050 | |
| }, | |
| { | |
| "loss": 1.0628, | |
| "grad_norm": 1.643943428993225, | |
| "learning_rate": 9.468024390976541e-05, | |
| "epoch": 0.53, | |
| "step": 35075 | |
| }, | |
| { | |
| "loss": 1.1548, | |
| "grad_norm": 1.9907810688018799, | |
| "learning_rate": 9.460514853864408e-05, | |
| "epoch": 0.53, | |
| "step": 35100 | |
| }, | |
| { | |
| "loss": 1.1677, | |
| "grad_norm": 1.1716595888137817, | |
| "learning_rate": 9.453005316752276e-05, | |
| "epoch": 0.53, | |
| "step": 35125 | |
| }, | |
| { | |
| "loss": 1.1908, | |
| "grad_norm": 2.2323215007781982, | |
| "learning_rate": 9.445495779640144e-05, | |
| "epoch": 0.53, | |
| "step": 35150 | |
| }, | |
| { | |
| "loss": 1.1572, | |
| "grad_norm": 1.4074227809906006, | |
| "learning_rate": 9.43798624252801e-05, | |
| "epoch": 0.53, | |
| "step": 35175 | |
| }, | |
| { | |
| "loss": 1.167, | |
| "grad_norm": 4.070502281188965, | |
| "learning_rate": 9.430476705415878e-05, | |
| "epoch": 0.53, | |
| "step": 35200 | |
| }, | |
| { | |
| "loss": 1.1227, | |
| "grad_norm": 1.3812352418899536, | |
| "learning_rate": 9.422967168303746e-05, | |
| "epoch": 0.53, | |
| "step": 35225 | |
| }, | |
| { | |
| "loss": 1.1987, | |
| "grad_norm": 1.3674787282943726, | |
| "learning_rate": 9.415457631191614e-05, | |
| "epoch": 0.53, | |
| "step": 35250 | |
| }, | |
| { | |
| "loss": 1.1032, | |
| "grad_norm": 1.9738848209381104, | |
| "learning_rate": 9.407948094079482e-05, | |
| "epoch": 0.53, | |
| "step": 35275 | |
| }, | |
| { | |
| "loss": 1.1589, | |
| "grad_norm": 1.1826382875442505, | |
| "learning_rate": 9.40043855696735e-05, | |
| "epoch": 0.53, | |
| "step": 35300 | |
| }, | |
| { | |
| "loss": 1.1984, | |
| "grad_norm": 2.014425277709961, | |
| "learning_rate": 9.392929019855216e-05, | |
| "epoch": 0.53, | |
| "step": 35325 | |
| }, | |
| { | |
| "loss": 1.1509, | |
| "grad_norm": 1.1934667825698853, | |
| "learning_rate": 9.385419482743084e-05, | |
| "epoch": 0.53, | |
| "step": 35350 | |
| }, | |
| { | |
| "loss": 1.2054, | |
| "grad_norm": 1.2793216705322266, | |
| "learning_rate": 9.377909945630952e-05, | |
| "epoch": 0.53, | |
| "step": 35375 | |
| }, | |
| { | |
| "loss": 1.1462, | |
| "grad_norm": 1.46218740940094, | |
| "learning_rate": 9.370400408518818e-05, | |
| "epoch": 0.53, | |
| "step": 35400 | |
| }, | |
| { | |
| "loss": 1.09, | |
| "grad_norm": 1.2986016273498535, | |
| "learning_rate": 9.362890871406686e-05, | |
| "epoch": 0.53, | |
| "step": 35425 | |
| }, | |
| { | |
| "loss": 1.1911, | |
| "grad_norm": 1.3429057598114014, | |
| "learning_rate": 9.355381334294554e-05, | |
| "epoch": 0.53, | |
| "step": 35450 | |
| }, | |
| { | |
| "loss": 1.0894, | |
| "grad_norm": 2.130441904067993, | |
| "learning_rate": 9.347871797182422e-05, | |
| "epoch": 0.53, | |
| "step": 35475 | |
| }, | |
| { | |
| "loss": 1.0808, | |
| "grad_norm": 2.666227102279663, | |
| "learning_rate": 9.34036226007029e-05, | |
| "epoch": 0.53, | |
| "step": 35500 | |
| }, | |
| { | |
| "loss": 1.1766, | |
| "grad_norm": 2.273437023162842, | |
| "learning_rate": 9.332852722958158e-05, | |
| "epoch": 0.53, | |
| "step": 35525 | |
| }, | |
| { | |
| "loss": 1.1705, | |
| "grad_norm": 2.2733075618743896, | |
| "learning_rate": 9.325343185846024e-05, | |
| "epoch": 0.53, | |
| "step": 35550 | |
| }, | |
| { | |
| "loss": 1.1519, | |
| "grad_norm": 3.6471107006073, | |
| "learning_rate": 9.317833648733892e-05, | |
| "epoch": 0.53, | |
| "step": 35575 | |
| }, | |
| { | |
| "loss": 1.1314, | |
| "grad_norm": 1.2116317749023438, | |
| "learning_rate": 9.31032411162176e-05, | |
| "epoch": 0.53, | |
| "step": 35600 | |
| }, | |
| { | |
| "loss": 1.2151, | |
| "grad_norm": 2.057880163192749, | |
| "learning_rate": 9.302814574509627e-05, | |
| "epoch": 0.54, | |
| "step": 35625 | |
| }, | |
| { | |
| "loss": 1.104, | |
| "grad_norm": 1.8840137720108032, | |
| "learning_rate": 9.295305037397495e-05, | |
| "epoch": 0.54, | |
| "step": 35650 | |
| }, | |
| { | |
| "loss": 1.153, | |
| "grad_norm": 1.324926733970642, | |
| "learning_rate": 9.287795500285364e-05, | |
| "epoch": 0.54, | |
| "step": 35675 | |
| }, | |
| { | |
| "loss": 1.1716, | |
| "grad_norm": 1.6749731302261353, | |
| "learning_rate": 9.28028596317323e-05, | |
| "epoch": 0.54, | |
| "step": 35700 | |
| }, | |
| { | |
| "loss": 1.2032, | |
| "grad_norm": 2.625720977783203, | |
| "learning_rate": 9.272776426061098e-05, | |
| "epoch": 0.54, | |
| "step": 35725 | |
| }, | |
| { | |
| "loss": 1.1532, | |
| "grad_norm": 1.7075999975204468, | |
| "learning_rate": 9.265266888948966e-05, | |
| "epoch": 0.54, | |
| "step": 35750 | |
| }, | |
| { | |
| "loss": 1.099, | |
| "grad_norm": 2.0305819511413574, | |
| "learning_rate": 9.257757351836833e-05, | |
| "epoch": 0.54, | |
| "step": 35775 | |
| }, | |
| { | |
| "loss": 1.1765, | |
| "grad_norm": 1.1253600120544434, | |
| "learning_rate": 9.2502478147247e-05, | |
| "epoch": 0.54, | |
| "step": 35800 | |
| }, | |
| { | |
| "loss": 1.1198, | |
| "grad_norm": 1.2533643245697021, | |
| "learning_rate": 9.242738277612568e-05, | |
| "epoch": 0.54, | |
| "step": 35825 | |
| }, | |
| { | |
| "loss": 1.1672, | |
| "grad_norm": 2.331897735595703, | |
| "learning_rate": 9.235228740500435e-05, | |
| "epoch": 0.54, | |
| "step": 35850 | |
| }, | |
| { | |
| "loss": 1.1543, | |
| "grad_norm": 1.556606411933899, | |
| "learning_rate": 9.227719203388303e-05, | |
| "epoch": 0.54, | |
| "step": 35875 | |
| }, | |
| { | |
| "loss": 1.143, | |
| "grad_norm": 2.3850412368774414, | |
| "learning_rate": 9.220209666276172e-05, | |
| "epoch": 0.54, | |
| "step": 35900 | |
| }, | |
| { | |
| "loss": 1.1789, | |
| "grad_norm": 2.364520788192749, | |
| "learning_rate": 9.212700129164039e-05, | |
| "epoch": 0.54, | |
| "step": 35925 | |
| }, | |
| { | |
| "loss": 1.1408, | |
| "grad_norm": 1.6768343448638916, | |
| "learning_rate": 9.205190592051907e-05, | |
| "epoch": 0.54, | |
| "step": 35950 | |
| }, | |
| { | |
| "loss": 1.2382, | |
| "grad_norm": 1.6469035148620605, | |
| "learning_rate": 9.197681054939774e-05, | |
| "epoch": 0.54, | |
| "step": 35975 | |
| }, | |
| { | |
| "loss": 1.1741, | |
| "grad_norm": 1.1769710779190063, | |
| "learning_rate": 9.190171517827641e-05, | |
| "epoch": 0.54, | |
| "step": 36000 | |
| }, | |
| { | |
| "loss": 1.1858, | |
| "grad_norm": 1.3249248266220093, | |
| "learning_rate": 9.182661980715509e-05, | |
| "epoch": 0.54, | |
| "step": 36025 | |
| }, | |
| { | |
| "loss": 1.1672, | |
| "grad_norm": 1.8996431827545166, | |
| "learning_rate": 9.175152443603377e-05, | |
| "epoch": 0.54, | |
| "step": 36050 | |
| }, | |
| { | |
| "loss": 1.1392, | |
| "grad_norm": 1.7952135801315308, | |
| "learning_rate": 9.167642906491245e-05, | |
| "epoch": 0.54, | |
| "step": 36075 | |
| }, | |
| { | |
| "loss": 1.1508, | |
| "grad_norm": 1.642858862876892, | |
| "learning_rate": 9.160133369379111e-05, | |
| "epoch": 0.54, | |
| "step": 36100 | |
| }, | |
| { | |
| "loss": 1.1122, | |
| "grad_norm": 1.287514090538025, | |
| "learning_rate": 9.15262383226698e-05, | |
| "epoch": 0.54, | |
| "step": 36125 | |
| }, | |
| { | |
| "loss": 1.0894, | |
| "grad_norm": 1.7376103401184082, | |
| "learning_rate": 9.145114295154847e-05, | |
| "epoch": 0.54, | |
| "step": 36150 | |
| }, | |
| { | |
| "loss": 1.0989, | |
| "grad_norm": 1.9539545774459839, | |
| "learning_rate": 9.137604758042715e-05, | |
| "epoch": 0.54, | |
| "step": 36175 | |
| }, | |
| { | |
| "loss": 1.2123, | |
| "grad_norm": 1.42177414894104, | |
| "learning_rate": 9.130095220930583e-05, | |
| "epoch": 0.54, | |
| "step": 36200 | |
| }, | |
| { | |
| "loss": 1.178, | |
| "grad_norm": 1.0218976736068726, | |
| "learning_rate": 9.122585683818449e-05, | |
| "epoch": 0.54, | |
| "step": 36225 | |
| }, | |
| { | |
| "loss": 1.1677, | |
| "grad_norm": 2.1340959072113037, | |
| "learning_rate": 9.115076146706317e-05, | |
| "epoch": 0.54, | |
| "step": 36250 | |
| }, | |
| { | |
| "loss": 1.1543, | |
| "grad_norm": 1.4234435558319092, | |
| "learning_rate": 9.107866991078671e-05, | |
| "epoch": 0.54, | |
| "step": 36275 | |
| }, | |
| { | |
| "loss": 1.1862, | |
| "grad_norm": 2.7633371353149414, | |
| "learning_rate": 9.100357453966538e-05, | |
| "epoch": 0.55, | |
| "step": 36300 | |
| }, | |
| { | |
| "loss": 1.1253, | |
| "grad_norm": 1.0972270965576172, | |
| "learning_rate": 9.092847916854406e-05, | |
| "epoch": 0.55, | |
| "step": 36325 | |
| }, | |
| { | |
| "loss": 1.1722, | |
| "grad_norm": 1.9171634912490845, | |
| "learning_rate": 9.085338379742274e-05, | |
| "epoch": 0.55, | |
| "step": 36350 | |
| }, | |
| { | |
| "loss": 1.2198, | |
| "grad_norm": 1.712023138999939, | |
| "learning_rate": 9.07782884263014e-05, | |
| "epoch": 0.55, | |
| "step": 36375 | |
| }, | |
| { | |
| "loss": 1.1141, | |
| "grad_norm": 1.8943400382995605, | |
| "learning_rate": 9.070319305518008e-05, | |
| "epoch": 0.55, | |
| "step": 36400 | |
| }, | |
| { | |
| "loss": 1.14, | |
| "grad_norm": 2.2785110473632812, | |
| "learning_rate": 9.062809768405876e-05, | |
| "epoch": 0.55, | |
| "step": 36425 | |
| }, | |
| { | |
| "loss": 1.1121, | |
| "grad_norm": 1.8687163591384888, | |
| "learning_rate": 9.055300231293742e-05, | |
| "epoch": 0.55, | |
| "step": 36450 | |
| }, | |
| { | |
| "loss": 1.1212, | |
| "grad_norm": 1.2206839323043823, | |
| "learning_rate": 9.047790694181612e-05, | |
| "epoch": 0.55, | |
| "step": 36475 | |
| }, | |
| { | |
| "loss": 1.1483, | |
| "grad_norm": 1.451223611831665, | |
| "learning_rate": 9.04028115706948e-05, | |
| "epoch": 0.55, | |
| "step": 36500 | |
| }, | |
| { | |
| "loss": 1.1291, | |
| "grad_norm": 1.4097929000854492, | |
| "learning_rate": 9.032771619957346e-05, | |
| "epoch": 0.55, | |
| "step": 36525 | |
| }, | |
| { | |
| "loss": 1.212, | |
| "grad_norm": 1.2845067977905273, | |
| "learning_rate": 9.025262082845214e-05, | |
| "epoch": 0.55, | |
| "step": 36550 | |
| }, | |
| { | |
| "loss": 1.1501, | |
| "grad_norm": 1.6853928565979004, | |
| "learning_rate": 9.017752545733082e-05, | |
| "epoch": 0.55, | |
| "step": 36575 | |
| }, | |
| { | |
| "loss": 1.1752, | |
| "grad_norm": 1.6147632598876953, | |
| "learning_rate": 9.010243008620948e-05, | |
| "epoch": 0.55, | |
| "step": 36600 | |
| }, | |
| { | |
| "loss": 1.0684, | |
| "grad_norm": 1.1251403093338013, | |
| "learning_rate": 9.002733471508816e-05, | |
| "epoch": 0.55, | |
| "step": 36625 | |
| }, | |
| { | |
| "loss": 1.182, | |
| "grad_norm": 2.0793652534484863, | |
| "learning_rate": 8.995223934396684e-05, | |
| "epoch": 0.55, | |
| "step": 36650 | |
| }, | |
| { | |
| "loss": 1.1353, | |
| "grad_norm": 1.7693026065826416, | |
| "learning_rate": 8.987714397284552e-05, | |
| "epoch": 0.55, | |
| "step": 36675 | |
| }, | |
| { | |
| "loss": 1.1888, | |
| "grad_norm": 2.8078482151031494, | |
| "learning_rate": 8.98020486017242e-05, | |
| "epoch": 0.55, | |
| "step": 36700 | |
| }, | |
| { | |
| "loss": 1.1771, | |
| "grad_norm": 1.857947587966919, | |
| "learning_rate": 8.972695323060288e-05, | |
| "epoch": 0.55, | |
| "step": 36725 | |
| }, | |
| { | |
| "loss": 1.1906, | |
| "grad_norm": 1.8160065412521362, | |
| "learning_rate": 8.965185785948154e-05, | |
| "epoch": 0.55, | |
| "step": 36750 | |
| }, | |
| { | |
| "loss": 1.2188, | |
| "grad_norm": 1.113638997077942, | |
| "learning_rate": 8.957676248836022e-05, | |
| "epoch": 0.55, | |
| "step": 36775 | |
| }, | |
| { | |
| "loss": 1.1625, | |
| "grad_norm": 2.105451822280884, | |
| "learning_rate": 8.95016671172389e-05, | |
| "epoch": 0.55, | |
| "step": 36800 | |
| }, | |
| { | |
| "loss": 1.1613, | |
| "grad_norm": 2.07865309715271, | |
| "learning_rate": 8.942657174611757e-05, | |
| "epoch": 0.55, | |
| "step": 36825 | |
| }, | |
| { | |
| "loss": 1.1396, | |
| "grad_norm": 3.033181667327881, | |
| "learning_rate": 8.935147637499625e-05, | |
| "epoch": 0.55, | |
| "step": 36850 | |
| }, | |
| { | |
| "loss": 1.1236, | |
| "grad_norm": 1.4927865266799927, | |
| "learning_rate": 8.927638100387492e-05, | |
| "epoch": 0.55, | |
| "step": 36875 | |
| }, | |
| { | |
| "loss": 1.218, | |
| "grad_norm": 1.5946248769760132, | |
| "learning_rate": 8.92012856327536e-05, | |
| "epoch": 0.55, | |
| "step": 36900 | |
| }, | |
| { | |
| "loss": 1.1734, | |
| "grad_norm": 2.349677562713623, | |
| "learning_rate": 8.912619026163228e-05, | |
| "epoch": 0.55, | |
| "step": 36925 | |
| }, | |
| { | |
| "loss": 1.1605, | |
| "grad_norm": 2.3983821868896484, | |
| "learning_rate": 8.905109489051096e-05, | |
| "epoch": 0.55, | |
| "step": 36950 | |
| }, | |
| { | |
| "loss": 1.1976, | |
| "grad_norm": 2.228635787963867, | |
| "learning_rate": 8.897599951938963e-05, | |
| "epoch": 0.56, | |
| "step": 36975 | |
| }, | |
| { | |
| "loss": 1.1316, | |
| "grad_norm": 1.0619374513626099, | |
| "learning_rate": 8.89009041482683e-05, | |
| "epoch": 0.56, | |
| "step": 37000 | |
| }, | |
| { | |
| "loss": 1.0945, | |
| "grad_norm": 2.490835428237915, | |
| "learning_rate": 8.882580877714698e-05, | |
| "epoch": 0.56, | |
| "step": 37025 | |
| }, | |
| { | |
| "loss": 1.1367, | |
| "grad_norm": 1.3563780784606934, | |
| "learning_rate": 8.875071340602565e-05, | |
| "epoch": 0.56, | |
| "step": 37050 | |
| }, | |
| { | |
| "loss": 1.1431, | |
| "grad_norm": 2.2259418964385986, | |
| "learning_rate": 8.867561803490433e-05, | |
| "epoch": 0.56, | |
| "step": 37075 | |
| }, | |
| { | |
| "loss": 1.1641, | |
| "grad_norm": 1.6267977952957153, | |
| "learning_rate": 8.860052266378301e-05, | |
| "epoch": 0.56, | |
| "step": 37100 | |
| }, | |
| { | |
| "loss": 1.1515, | |
| "grad_norm": 1.1950840950012207, | |
| "learning_rate": 8.852542729266169e-05, | |
| "epoch": 0.56, | |
| "step": 37125 | |
| }, | |
| { | |
| "loss": 1.1531, | |
| "grad_norm": 2.104607343673706, | |
| "learning_rate": 8.845033192154036e-05, | |
| "epoch": 0.56, | |
| "step": 37150 | |
| }, | |
| { | |
| "loss": 1.1484, | |
| "grad_norm": 1.70024573802948, | |
| "learning_rate": 8.837523655041904e-05, | |
| "epoch": 0.56, | |
| "step": 37175 | |
| }, | |
| { | |
| "loss": 1.1501, | |
| "grad_norm": 1.2371578216552734, | |
| "learning_rate": 8.830014117929771e-05, | |
| "epoch": 0.56, | |
| "step": 37200 | |
| }, | |
| { | |
| "loss": 1.186, | |
| "grad_norm": 0.9835503101348877, | |
| "learning_rate": 8.822504580817639e-05, | |
| "epoch": 0.56, | |
| "step": 37225 | |
| }, | |
| { | |
| "loss": 1.2074, | |
| "grad_norm": 1.633745551109314, | |
| "learning_rate": 8.814995043705507e-05, | |
| "epoch": 0.56, | |
| "step": 37250 | |
| }, | |
| { | |
| "loss": 1.0625, | |
| "grad_norm": 0.9619184732437134, | |
| "learning_rate": 8.807485506593373e-05, | |
| "epoch": 0.56, | |
| "step": 37275 | |
| }, | |
| { | |
| "loss": 1.1741, | |
| "grad_norm": 2.740689516067505, | |
| "learning_rate": 8.799975969481241e-05, | |
| "epoch": 0.56, | |
| "step": 37300 | |
| }, | |
| { | |
| "loss": 1.1879, | |
| "grad_norm": 1.7593574523925781, | |
| "learning_rate": 8.79246643236911e-05, | |
| "epoch": 0.56, | |
| "step": 37325 | |
| }, | |
| { | |
| "loss": 1.1428, | |
| "grad_norm": 1.7317709922790527, | |
| "learning_rate": 8.784956895256977e-05, | |
| "epoch": 0.56, | |
| "step": 37350 | |
| }, | |
| { | |
| "loss": 1.178, | |
| "grad_norm": 2.689879894256592, | |
| "learning_rate": 8.777447358144845e-05, | |
| "epoch": 0.56, | |
| "step": 37375 | |
| }, | |
| { | |
| "loss": 1.1386, | |
| "grad_norm": 1.3778091669082642, | |
| "learning_rate": 8.769937821032713e-05, | |
| "epoch": 0.56, | |
| "step": 37400 | |
| }, | |
| { | |
| "loss": 1.1171, | |
| "grad_norm": 1.4201562404632568, | |
| "learning_rate": 8.762428283920579e-05, | |
| "epoch": 0.56, | |
| "step": 37425 | |
| }, | |
| { | |
| "loss": 1.1575, | |
| "grad_norm": 1.2890523672103882, | |
| "learning_rate": 8.754918746808447e-05, | |
| "epoch": 0.56, | |
| "step": 37450 | |
| }, | |
| { | |
| "loss": 1.1488, | |
| "grad_norm": 1.5881069898605347, | |
| "learning_rate": 8.747409209696315e-05, | |
| "epoch": 0.56, | |
| "step": 37475 | |
| }, | |
| { | |
| "loss": 1.1466, | |
| "grad_norm": 2.2226951122283936, | |
| "learning_rate": 8.739899672584181e-05, | |
| "epoch": 0.56, | |
| "step": 37500 | |
| }, | |
| { | |
| "loss": 1.2388, | |
| "grad_norm": 1.865116000175476, | |
| "learning_rate": 8.73239013547205e-05, | |
| "epoch": 0.56, | |
| "step": 37525 | |
| }, | |
| { | |
| "loss": 1.1089, | |
| "grad_norm": 1.7630786895751953, | |
| "learning_rate": 8.724880598359919e-05, | |
| "epoch": 0.56, | |
| "step": 37550 | |
| }, | |
| { | |
| "loss": 1.1414, | |
| "grad_norm": 1.465029001235962, | |
| "learning_rate": 8.717371061247785e-05, | |
| "epoch": 0.56, | |
| "step": 37575 | |
| }, | |
| { | |
| "loss": 1.1533, | |
| "grad_norm": 1.2078875303268433, | |
| "learning_rate": 8.709861524135653e-05, | |
| "epoch": 0.56, | |
| "step": 37600 | |
| }, | |
| { | |
| "loss": 1.1931, | |
| "grad_norm": 1.6650409698486328, | |
| "learning_rate": 8.702351987023521e-05, | |
| "epoch": 0.57, | |
| "step": 37625 | |
| }, | |
| { | |
| "loss": 1.2402, | |
| "grad_norm": 2.3124303817749023, | |
| "learning_rate": 8.694842449911387e-05, | |
| "epoch": 0.57, | |
| "step": 37650 | |
| }, | |
| { | |
| "loss": 1.1566, | |
| "grad_norm": 1.6543500423431396, | |
| "learning_rate": 8.687332912799255e-05, | |
| "epoch": 0.57, | |
| "step": 37675 | |
| }, | |
| { | |
| "loss": 1.1466, | |
| "grad_norm": 1.0098353624343872, | |
| "learning_rate": 8.680123757171608e-05, | |
| "epoch": 0.57, | |
| "step": 37700 | |
| }, | |
| { | |
| "loss": 1.1809, | |
| "grad_norm": 4.323051929473877, | |
| "learning_rate": 8.672614220059476e-05, | |
| "epoch": 0.57, | |
| "step": 37725 | |
| }, | |
| { | |
| "loss": 1.1547, | |
| "grad_norm": 1.610032081604004, | |
| "learning_rate": 8.665104682947344e-05, | |
| "epoch": 0.57, | |
| "step": 37750 | |
| }, | |
| { | |
| "loss": 1.1416, | |
| "grad_norm": 2.1819934844970703, | |
| "learning_rate": 8.65759514583521e-05, | |
| "epoch": 0.57, | |
| "step": 37775 | |
| }, | |
| { | |
| "loss": 1.1733, | |
| "grad_norm": 1.3869298696517944, | |
| "learning_rate": 8.650085608723078e-05, | |
| "epoch": 0.57, | |
| "step": 37800 | |
| }, | |
| { | |
| "loss": 1.1933, | |
| "grad_norm": 4.251647472381592, | |
| "learning_rate": 8.642576071610946e-05, | |
| "epoch": 0.57, | |
| "step": 37825 | |
| }, | |
| { | |
| "loss": 1.1496, | |
| "grad_norm": 1.59364914894104, | |
| "learning_rate": 8.635066534498814e-05, | |
| "epoch": 0.57, | |
| "step": 37850 | |
| }, | |
| { | |
| "loss": 1.1961, | |
| "grad_norm": 1.4821110963821411, | |
| "learning_rate": 8.62755699738668e-05, | |
| "epoch": 0.57, | |
| "step": 37875 | |
| }, | |
| { | |
| "loss": 1.1613, | |
| "grad_norm": 1.1281379461288452, | |
| "learning_rate": 8.62004746027455e-05, | |
| "epoch": 0.57, | |
| "step": 37900 | |
| }, | |
| { | |
| "loss": 1.1104, | |
| "grad_norm": 1.1649848222732544, | |
| "learning_rate": 8.612537923162416e-05, | |
| "epoch": 0.57, | |
| "step": 37925 | |
| }, | |
| { | |
| "loss": 1.1502, | |
| "grad_norm": 1.2780572175979614, | |
| "learning_rate": 8.605028386050284e-05, | |
| "epoch": 0.57, | |
| "step": 37950 | |
| }, | |
| { | |
| "loss": 1.1132, | |
| "grad_norm": 0.8712659478187561, | |
| "learning_rate": 8.597518848938152e-05, | |
| "epoch": 0.57, | |
| "step": 37975 | |
| }, | |
| { | |
| "loss": 1.1839, | |
| "grad_norm": 1.912044882774353, | |
| "learning_rate": 8.59000931182602e-05, | |
| "epoch": 0.57, | |
| "step": 38000 | |
| }, | |
| { | |
| "loss": 1.2002, | |
| "grad_norm": 1.3458526134490967, | |
| "learning_rate": 8.582499774713887e-05, | |
| "epoch": 0.57, | |
| "step": 38025 | |
| }, | |
| { | |
| "loss": 1.0894, | |
| "grad_norm": 0.9828363060951233, | |
| "learning_rate": 8.574990237601755e-05, | |
| "epoch": 0.57, | |
| "step": 38050 | |
| }, | |
| { | |
| "loss": 1.1063, | |
| "grad_norm": 1.625246286392212, | |
| "learning_rate": 8.567480700489622e-05, | |
| "epoch": 0.57, | |
| "step": 38075 | |
| }, | |
| { | |
| "loss": 1.1812, | |
| "grad_norm": 2.1737546920776367, | |
| "learning_rate": 8.559971163377489e-05, | |
| "epoch": 0.57, | |
| "step": 38100 | |
| }, | |
| { | |
| "loss": 1.2085, | |
| "grad_norm": 2.304011583328247, | |
| "learning_rate": 8.552461626265358e-05, | |
| "epoch": 0.57, | |
| "step": 38125 | |
| }, | |
| { | |
| "loss": 1.2123, | |
| "grad_norm": 2.7804811000823975, | |
| "learning_rate": 8.544952089153225e-05, | |
| "epoch": 0.57, | |
| "step": 38150 | |
| }, | |
| { | |
| "loss": 1.0834, | |
| "grad_norm": 1.5996559858322144, | |
| "learning_rate": 8.537442552041093e-05, | |
| "epoch": 0.57, | |
| "step": 38175 | |
| }, | |
| { | |
| "loss": 1.1438, | |
| "grad_norm": 2.293555736541748, | |
| "learning_rate": 8.52993301492896e-05, | |
| "epoch": 0.57, | |
| "step": 38200 | |
| }, | |
| { | |
| "loss": 1.1268, | |
| "grad_norm": 3.2344138622283936, | |
| "learning_rate": 8.522423477816828e-05, | |
| "epoch": 0.57, | |
| "step": 38225 | |
| }, | |
| { | |
| "loss": 1.1241, | |
| "grad_norm": 1.3770357370376587, | |
| "learning_rate": 8.514913940704695e-05, | |
| "epoch": 0.57, | |
| "step": 38250 | |
| }, | |
| { | |
| "loss": 1.1562, | |
| "grad_norm": 1.8360143899917603, | |
| "learning_rate": 8.507404403592563e-05, | |
| "epoch": 0.57, | |
| "step": 38275 | |
| }, | |
| { | |
| "loss": 1.0981, | |
| "grad_norm": 1.4072625637054443, | |
| "learning_rate": 8.49989486648043e-05, | |
| "epoch": 0.58, | |
| "step": 38300 | |
| }, | |
| { | |
| "loss": 1.1859, | |
| "grad_norm": 1.3473397493362427, | |
| "learning_rate": 8.492385329368299e-05, | |
| "epoch": 0.58, | |
| "step": 38325 | |
| }, | |
| { | |
| "loss": 1.1482, | |
| "grad_norm": 1.2026944160461426, | |
| "learning_rate": 8.484875792256166e-05, | |
| "epoch": 0.58, | |
| "step": 38350 | |
| }, | |
| { | |
| "loss": 1.1669, | |
| "grad_norm": 1.541365146636963, | |
| "learning_rate": 8.477366255144033e-05, | |
| "epoch": 0.58, | |
| "step": 38375 | |
| }, | |
| { | |
| "loss": 1.1388, | |
| "grad_norm": 1.4856247901916504, | |
| "learning_rate": 8.469856718031901e-05, | |
| "epoch": 0.58, | |
| "step": 38400 | |
| }, | |
| { | |
| "loss": 1.1062, | |
| "grad_norm": 1.3066248893737793, | |
| "learning_rate": 8.462347180919769e-05, | |
| "epoch": 0.58, | |
| "step": 38425 | |
| }, | |
| { | |
| "loss": 1.2007, | |
| "grad_norm": 1.4467949867248535, | |
| "learning_rate": 8.454837643807637e-05, | |
| "epoch": 0.58, | |
| "step": 38450 | |
| }, | |
| { | |
| "loss": 1.1274, | |
| "grad_norm": 1.0385124683380127, | |
| "learning_rate": 8.447328106695503e-05, | |
| "epoch": 0.58, | |
| "step": 38475 | |
| }, | |
| { | |
| "loss": 1.1398, | |
| "grad_norm": 1.994707465171814, | |
| "learning_rate": 8.439818569583371e-05, | |
| "epoch": 0.58, | |
| "step": 38500 | |
| }, | |
| { | |
| "loss": 1.1434, | |
| "grad_norm": 1.9492372274398804, | |
| "learning_rate": 8.432309032471239e-05, | |
| "epoch": 0.58, | |
| "step": 38525 | |
| }, | |
| { | |
| "loss": 1.1526, | |
| "grad_norm": 1.5071452856063843, | |
| "learning_rate": 8.424799495359107e-05, | |
| "epoch": 0.58, | |
| "step": 38550 | |
| }, | |
| { | |
| "loss": 1.2361, | |
| "grad_norm": 1.9257084131240845, | |
| "learning_rate": 8.417289958246975e-05, | |
| "epoch": 0.58, | |
| "step": 38575 | |
| }, | |
| { | |
| "loss": 1.1397, | |
| "grad_norm": 1.549302101135254, | |
| "learning_rate": 8.409780421134841e-05, | |
| "epoch": 0.58, | |
| "step": 38600 | |
| }, | |
| { | |
| "loss": 1.0881, | |
| "grad_norm": 1.1353474855422974, | |
| "learning_rate": 8.402270884022709e-05, | |
| "epoch": 0.58, | |
| "step": 38625 | |
| }, | |
| { | |
| "loss": 1.1171, | |
| "grad_norm": 1.092421293258667, | |
| "learning_rate": 8.394761346910577e-05, | |
| "epoch": 0.58, | |
| "step": 38650 | |
| }, | |
| { | |
| "loss": 1.1855, | |
| "grad_norm": 2.2004194259643555, | |
| "learning_rate": 8.387251809798445e-05, | |
| "epoch": 0.58, | |
| "step": 38675 | |
| }, | |
| { | |
| "loss": 1.1005, | |
| "grad_norm": 1.2563297748565674, | |
| "learning_rate": 8.379742272686311e-05, | |
| "epoch": 0.58, | |
| "step": 38700 | |
| }, | |
| { | |
| "loss": 1.1269, | |
| "grad_norm": 1.0468568801879883, | |
| "learning_rate": 8.37223273557418e-05, | |
| "epoch": 0.58, | |
| "step": 38725 | |
| }, | |
| { | |
| "loss": 1.1281, | |
| "grad_norm": 1.2064344882965088, | |
| "learning_rate": 8.364723198462047e-05, | |
| "epoch": 0.58, | |
| "step": 38750 | |
| }, | |
| { | |
| "loss": 1.1261, | |
| "grad_norm": 1.8252434730529785, | |
| "learning_rate": 8.357213661349915e-05, | |
| "epoch": 0.58, | |
| "step": 38775 | |
| }, | |
| { | |
| "loss": 1.1461, | |
| "grad_norm": 2.017496109008789, | |
| "learning_rate": 8.349704124237783e-05, | |
| "epoch": 0.58, | |
| "step": 38800 | |
| }, | |
| { | |
| "loss": 1.1649, | |
| "grad_norm": 2.0913796424865723, | |
| "learning_rate": 8.342494968610134e-05, | |
| "epoch": 0.58, | |
| "step": 38825 | |
| }, | |
| { | |
| "loss": 1.109, | |
| "grad_norm": 1.446608304977417, | |
| "learning_rate": 8.334985431498002e-05, | |
| "epoch": 0.58, | |
| "step": 38850 | |
| }, | |
| { | |
| "loss": 1.1402, | |
| "grad_norm": 1.3379923105239868, | |
| "learning_rate": 8.32747589438587e-05, | |
| "epoch": 0.58, | |
| "step": 38875 | |
| }, | |
| { | |
| "loss": 1.151, | |
| "grad_norm": 1.2311291694641113, | |
| "learning_rate": 8.319966357273738e-05, | |
| "epoch": 0.58, | |
| "step": 38900 | |
| }, | |
| { | |
| "loss": 1.0964, | |
| "grad_norm": 1.5600236654281616, | |
| "learning_rate": 8.312456820161606e-05, | |
| "epoch": 0.58, | |
| "step": 38925 | |
| }, | |
| { | |
| "loss": 1.2035, | |
| "grad_norm": 1.953704833984375, | |
| "learning_rate": 8.304947283049474e-05, | |
| "epoch": 0.58, | |
| "step": 38950 | |
| }, | |
| { | |
| "loss": 1.1308, | |
| "grad_norm": 2.2543299198150635, | |
| "learning_rate": 8.29743774593734e-05, | |
| "epoch": 0.59, | |
| "step": 38975 | |
| }, | |
| { | |
| "loss": 1.1265, | |
| "grad_norm": 1.1865688562393188, | |
| "learning_rate": 8.289928208825208e-05, | |
| "epoch": 0.59, | |
| "step": 39000 | |
| }, | |
| { | |
| "loss": 1.1292, | |
| "grad_norm": 5.55220890045166, | |
| "learning_rate": 8.282418671713076e-05, | |
| "epoch": 0.59, | |
| "step": 39025 | |
| }, | |
| { | |
| "loss": 1.1661, | |
| "grad_norm": 4.146172523498535, | |
| "learning_rate": 8.274909134600943e-05, | |
| "epoch": 0.59, | |
| "step": 39050 | |
| }, | |
| { | |
| "loss": 1.0739, | |
| "grad_norm": 1.3395634889602661, | |
| "learning_rate": 8.26739959748881e-05, | |
| "epoch": 0.59, | |
| "step": 39075 | |
| }, | |
| { | |
| "loss": 1.1592, | |
| "grad_norm": 1.3389477729797363, | |
| "learning_rate": 8.259890060376679e-05, | |
| "epoch": 0.59, | |
| "step": 39100 | |
| }, | |
| { | |
| "loss": 1.2078, | |
| "grad_norm": 1.6884431838989258, | |
| "learning_rate": 8.252380523264546e-05, | |
| "epoch": 0.59, | |
| "step": 39125 | |
| }, | |
| { | |
| "loss": 1.1245, | |
| "grad_norm": 2.1312379837036133, | |
| "learning_rate": 8.244870986152414e-05, | |
| "epoch": 0.59, | |
| "step": 39150 | |
| }, | |
| { | |
| "loss": 1.1295, | |
| "grad_norm": 1.0998674631118774, | |
| "learning_rate": 8.237361449040282e-05, | |
| "epoch": 0.59, | |
| "step": 39175 | |
| }, | |
| { | |
| "loss": 1.0666, | |
| "grad_norm": 1.3891421556472778, | |
| "learning_rate": 8.229851911928149e-05, | |
| "epoch": 0.59, | |
| "step": 39200 | |
| }, | |
| { | |
| "loss": 1.2052, | |
| "grad_norm": 1.069043755531311, | |
| "learning_rate": 8.222342374816017e-05, | |
| "epoch": 0.59, | |
| "step": 39225 | |
| }, | |
| { | |
| "loss": 1.1251, | |
| "grad_norm": 2.247673273086548, | |
| "learning_rate": 8.214832837703884e-05, | |
| "epoch": 0.59, | |
| "step": 39250 | |
| }, | |
| { | |
| "loss": 1.124, | |
| "grad_norm": 1.6093597412109375, | |
| "learning_rate": 8.207323300591751e-05, | |
| "epoch": 0.59, | |
| "step": 39275 | |
| }, | |
| { | |
| "loss": 1.0996, | |
| "grad_norm": 2.3855221271514893, | |
| "learning_rate": 8.199813763479619e-05, | |
| "epoch": 0.59, | |
| "step": 39300 | |
| }, | |
| { | |
| "loss": 1.1916, | |
| "grad_norm": 2.479825258255005, | |
| "learning_rate": 8.192304226367488e-05, | |
| "epoch": 0.59, | |
| "step": 39325 | |
| }, | |
| { | |
| "loss": 1.1752, | |
| "grad_norm": 1.302335262298584, | |
| "learning_rate": 8.184794689255355e-05, | |
| "epoch": 0.59, | |
| "step": 39350 | |
| }, | |
| { | |
| "loss": 1.0752, | |
| "grad_norm": 1.7336974143981934, | |
| "learning_rate": 8.177285152143223e-05, | |
| "epoch": 0.59, | |
| "step": 39375 | |
| }, | |
| { | |
| "loss": 1.1491, | |
| "grad_norm": 1.7334376573562622, | |
| "learning_rate": 8.16977561503109e-05, | |
| "epoch": 0.59, | |
| "step": 39400 | |
| }, | |
| { | |
| "loss": 1.1288, | |
| "grad_norm": 1.5370564460754395, | |
| "learning_rate": 8.162266077918957e-05, | |
| "epoch": 0.59, | |
| "step": 39425 | |
| }, | |
| { | |
| "loss": 1.1544, | |
| "grad_norm": 2.190082550048828, | |
| "learning_rate": 8.154756540806825e-05, | |
| "epoch": 0.59, | |
| "step": 39450 | |
| }, | |
| { | |
| "loss": 1.1265, | |
| "grad_norm": 1.2925649881362915, | |
| "learning_rate": 8.147247003694693e-05, | |
| "epoch": 0.59, | |
| "step": 39475 | |
| }, | |
| { | |
| "loss": 1.1175, | |
| "grad_norm": 1.5995088815689087, | |
| "learning_rate": 8.139737466582559e-05, | |
| "epoch": 0.59, | |
| "step": 39500 | |
| }, | |
| { | |
| "loss": 1.1401, | |
| "grad_norm": 2.018927812576294, | |
| "learning_rate": 8.132227929470427e-05, | |
| "epoch": 0.59, | |
| "step": 39525 | |
| }, | |
| { | |
| "loss": 1.145, | |
| "grad_norm": 2.101435661315918, | |
| "learning_rate": 8.124718392358296e-05, | |
| "epoch": 0.59, | |
| "step": 39550 | |
| }, | |
| { | |
| "loss": 1.2138, | |
| "grad_norm": 1.0594968795776367, | |
| "learning_rate": 8.117208855246163e-05, | |
| "epoch": 0.59, | |
| "step": 39575 | |
| }, | |
| { | |
| "loss": 1.1428, | |
| "grad_norm": 1.3600685596466064, | |
| "learning_rate": 8.109699318134031e-05, | |
| "epoch": 0.59, | |
| "step": 39600 | |
| }, | |
| { | |
| "loss": 1.1717, | |
| "grad_norm": 1.4228684902191162, | |
| "learning_rate": 8.102189781021899e-05, | |
| "epoch": 0.6, | |
| "step": 39625 | |
| }, | |
| { | |
| "loss": 1.1585, | |
| "grad_norm": 1.1526142358779907, | |
| "learning_rate": 8.094680243909765e-05, | |
| "epoch": 0.6, | |
| "step": 39650 | |
| }, | |
| { | |
| "loss": 1.182, | |
| "grad_norm": 1.6106703281402588, | |
| "learning_rate": 8.087170706797633e-05, | |
| "epoch": 0.6, | |
| "step": 39675 | |
| }, | |
| { | |
| "loss": 1.0861, | |
| "grad_norm": 1.4839822053909302, | |
| "learning_rate": 8.079661169685501e-05, | |
| "epoch": 0.6, | |
| "step": 39700 | |
| }, | |
| { | |
| "loss": 1.1069, | |
| "grad_norm": 1.2879067659378052, | |
| "learning_rate": 8.072151632573368e-05, | |
| "epoch": 0.6, | |
| "step": 39725 | |
| }, | |
| { | |
| "loss": 1.0987, | |
| "grad_norm": 1.0558810234069824, | |
| "learning_rate": 8.064642095461235e-05, | |
| "epoch": 0.6, | |
| "step": 39750 | |
| }, | |
| { | |
| "loss": 1.1328, | |
| "grad_norm": 1.1778608560562134, | |
| "learning_rate": 8.057132558349105e-05, | |
| "epoch": 0.6, | |
| "step": 39775 | |
| }, | |
| { | |
| "loss": 1.1309, | |
| "grad_norm": 6.927417755126953, | |
| "learning_rate": 8.049623021236971e-05, | |
| "epoch": 0.6, | |
| "step": 39800 | |
| }, | |
| { | |
| "loss": 1.0943, | |
| "grad_norm": 2.03812837600708, | |
| "learning_rate": 8.042113484124839e-05, | |
| "epoch": 0.6, | |
| "step": 39825 | |
| }, | |
| { | |
| "loss": 1.1745, | |
| "grad_norm": 1.8612788915634155, | |
| "learning_rate": 8.034603947012707e-05, | |
| "epoch": 0.6, | |
| "step": 39850 | |
| }, | |
| { | |
| "loss": 1.1406, | |
| "grad_norm": 1.7891243696212769, | |
| "learning_rate": 8.027094409900574e-05, | |
| "epoch": 0.6, | |
| "step": 39875 | |
| }, | |
| { | |
| "loss": 1.1637, | |
| "grad_norm": 1.3130542039871216, | |
| "learning_rate": 8.019584872788441e-05, | |
| "epoch": 0.6, | |
| "step": 39900 | |
| }, | |
| { | |
| "loss": 1.161, | |
| "grad_norm": 1.4459270238876343, | |
| "learning_rate": 8.012075335676309e-05, | |
| "epoch": 0.6, | |
| "step": 39925 | |
| }, | |
| { | |
| "loss": 1.0771, | |
| "grad_norm": 1.576799988746643, | |
| "learning_rate": 8.004565798564176e-05, | |
| "epoch": 0.6, | |
| "step": 39950 | |
| }, | |
| { | |
| "loss": 1.1488, | |
| "grad_norm": 1.5947468280792236, | |
| "learning_rate": 7.997056261452045e-05, | |
| "epoch": 0.6, | |
| "step": 39975 | |
| }, | |
| { | |
| "loss": 1.082, | |
| "grad_norm": 1.6659477949142456, | |
| "learning_rate": 7.989546724339913e-05, | |
| "epoch": 0.6, | |
| "step": 40000 | |
| }, | |
| { | |
| "loss": 1.1407, | |
| "grad_norm": 5.243545055389404, | |
| "learning_rate": 7.98203718722778e-05, | |
| "epoch": 0.6, | |
| "step": 40025 | |
| }, | |
| { | |
| "loss": 1.0316, | |
| "grad_norm": 1.1737557649612427, | |
| "learning_rate": 7.974527650115647e-05, | |
| "epoch": 0.6, | |
| "step": 40050 | |
| }, | |
| { | |
| "loss": 1.1271, | |
| "grad_norm": 1.7314780950546265, | |
| "learning_rate": 7.967018113003515e-05, | |
| "epoch": 0.6, | |
| "step": 40075 | |
| }, | |
| { | |
| "loss": 1.1789, | |
| "grad_norm": 2.0266127586364746, | |
| "learning_rate": 7.959508575891382e-05, | |
| "epoch": 0.6, | |
| "step": 40100 | |
| }, | |
| { | |
| "loss": 1.1694, | |
| "grad_norm": 1.2344083786010742, | |
| "learning_rate": 7.95199903877925e-05, | |
| "epoch": 0.6, | |
| "step": 40125 | |
| }, | |
| { | |
| "loss": 1.2533, | |
| "grad_norm": 1.9182096719741821, | |
| "learning_rate": 7.944489501667118e-05, | |
| "epoch": 0.6, | |
| "step": 40150 | |
| }, | |
| { | |
| "loss": 1.1746, | |
| "grad_norm": 1.2988171577453613, | |
| "learning_rate": 7.936979964554984e-05, | |
| "epoch": 0.6, | |
| "step": 40175 | |
| }, | |
| { | |
| "loss": 1.1278, | |
| "grad_norm": 1.847265601158142, | |
| "learning_rate": 7.929470427442853e-05, | |
| "epoch": 0.6, | |
| "step": 40200 | |
| }, | |
| { | |
| "loss": 1.1505, | |
| "grad_norm": 1.2763010263442993, | |
| "learning_rate": 7.921960890330721e-05, | |
| "epoch": 0.6, | |
| "step": 40225 | |
| }, | |
| { | |
| "loss": 1.1054, | |
| "grad_norm": 1.8603181838989258, | |
| "learning_rate": 7.914451353218588e-05, | |
| "epoch": 0.6, | |
| "step": 40250 | |
| }, | |
| { | |
| "loss": 1.141, | |
| "grad_norm": 1.320823073387146, | |
| "learning_rate": 7.906941816106456e-05, | |
| "epoch": 0.6, | |
| "step": 40275 | |
| }, | |
| { | |
| "loss": 1.1459, | |
| "grad_norm": 1.6374232769012451, | |
| "learning_rate": 7.899432278994324e-05, | |
| "epoch": 0.61, | |
| "step": 40300 | |
| }, | |
| { | |
| "loss": 1.1029, | |
| "grad_norm": 2.58848237991333, | |
| "learning_rate": 7.89192274188219e-05, | |
| "epoch": 0.61, | |
| "step": 40325 | |
| }, | |
| { | |
| "loss": 1.0986, | |
| "grad_norm": 2.0660908222198486, | |
| "learning_rate": 7.884413204770058e-05, | |
| "epoch": 0.61, | |
| "step": 40350 | |
| }, | |
| { | |
| "loss": 1.1133, | |
| "grad_norm": 1.2920788526535034, | |
| "learning_rate": 7.876903667657926e-05, | |
| "epoch": 0.61, | |
| "step": 40375 | |
| }, | |
| { | |
| "loss": 1.1248, | |
| "grad_norm": 1.9770618677139282, | |
| "learning_rate": 7.869394130545792e-05, | |
| "epoch": 0.61, | |
| "step": 40400 | |
| }, | |
| { | |
| "loss": 1.1426, | |
| "grad_norm": 1.2287840843200684, | |
| "learning_rate": 7.861884593433662e-05, | |
| "epoch": 0.61, | |
| "step": 40425 | |
| }, | |
| { | |
| "loss": 1.13, | |
| "grad_norm": 1.4473248720169067, | |
| "learning_rate": 7.85437505632153e-05, | |
| "epoch": 0.61, | |
| "step": 40450 | |
| }, | |
| { | |
| "loss": 1.1542, | |
| "grad_norm": 1.8491151332855225, | |
| "learning_rate": 7.846865519209396e-05, | |
| "epoch": 0.61, | |
| "step": 40475 | |
| }, | |
| { | |
| "loss": 1.1202, | |
| "grad_norm": 1.6479681730270386, | |
| "learning_rate": 7.839355982097264e-05, | |
| "epoch": 0.61, | |
| "step": 40500 | |
| }, | |
| { | |
| "loss": 1.1155, | |
| "grad_norm": 1.355073094367981, | |
| "learning_rate": 7.831846444985132e-05, | |
| "epoch": 0.61, | |
| "step": 40525 | |
| }, | |
| { | |
| "loss": 1.0916, | |
| "grad_norm": 2.459308385848999, | |
| "learning_rate": 7.824336907872998e-05, | |
| "epoch": 0.61, | |
| "step": 40550 | |
| }, | |
| { | |
| "loss": 1.0323, | |
| "grad_norm": 2.234042167663574, | |
| "learning_rate": 7.816827370760866e-05, | |
| "epoch": 0.61, | |
| "step": 40575 | |
| }, | |
| { | |
| "loss": 1.082, | |
| "grad_norm": 1.3024649620056152, | |
| "learning_rate": 7.809317833648734e-05, | |
| "epoch": 0.61, | |
| "step": 40600 | |
| }, | |
| { | |
| "loss": 1.1336, | |
| "grad_norm": 1.6423088312149048, | |
| "learning_rate": 7.801808296536602e-05, | |
| "epoch": 0.61, | |
| "step": 40625 | |
| }, | |
| { | |
| "loss": 1.1655, | |
| "grad_norm": 1.7982234954833984, | |
| "learning_rate": 7.79429875942447e-05, | |
| "epoch": 0.61, | |
| "step": 40650 | |
| }, | |
| { | |
| "loss": 1.1202, | |
| "grad_norm": 1.7683314085006714, | |
| "learning_rate": 7.786789222312338e-05, | |
| "epoch": 0.61, | |
| "step": 40675 | |
| }, | |
| { | |
| "loss": 1.148, | |
| "grad_norm": 1.1973870992660522, | |
| "learning_rate": 7.779279685200204e-05, | |
| "epoch": 0.61, | |
| "step": 40700 | |
| }, | |
| { | |
| "loss": 1.1323, | |
| "grad_norm": 1.366228461265564, | |
| "learning_rate": 7.771770148088072e-05, | |
| "epoch": 0.61, | |
| "step": 40725 | |
| }, | |
| { | |
| "loss": 1.1777, | |
| "grad_norm": 1.8791155815124512, | |
| "learning_rate": 7.76426061097594e-05, | |
| "epoch": 0.61, | |
| "step": 40750 | |
| }, | |
| { | |
| "loss": 1.2012, | |
| "grad_norm": 1.5715335607528687, | |
| "learning_rate": 7.756751073863807e-05, | |
| "epoch": 0.61, | |
| "step": 40775 | |
| }, | |
| { | |
| "loss": 1.1607, | |
| "grad_norm": 2.0917367935180664, | |
| "learning_rate": 7.749241536751674e-05, | |
| "epoch": 0.61, | |
| "step": 40800 | |
| }, | |
| { | |
| "loss": 1.1775, | |
| "grad_norm": 2.049710988998413, | |
| "learning_rate": 7.741731999639542e-05, | |
| "epoch": 0.61, | |
| "step": 40825 | |
| }, | |
| { | |
| "loss": 1.171, | |
| "grad_norm": 1.9413490295410156, | |
| "learning_rate": 7.73422246252741e-05, | |
| "epoch": 0.61, | |
| "step": 40850 | |
| }, | |
| { | |
| "loss": 1.147, | |
| "grad_norm": 1.760611891746521, | |
| "learning_rate": 7.726712925415278e-05, | |
| "epoch": 0.61, | |
| "step": 40875 | |
| }, | |
| { | |
| "loss": 1.0593, | |
| "grad_norm": 1.515251874923706, | |
| "learning_rate": 7.719203388303146e-05, | |
| "epoch": 0.61, | |
| "step": 40900 | |
| }, | |
| { | |
| "loss": 1.1917, | |
| "grad_norm": 1.9760046005249023, | |
| "learning_rate": 7.711693851191013e-05, | |
| "epoch": 0.61, | |
| "step": 40925 | |
| }, | |
| { | |
| "loss": 1.1901, | |
| "grad_norm": 2.0077996253967285, | |
| "learning_rate": 7.70418431407888e-05, | |
| "epoch": 0.62, | |
| "step": 40950 | |
| }, | |
| { | |
| "loss": 1.1352, | |
| "grad_norm": 1.6817588806152344, | |
| "learning_rate": 7.696674776966748e-05, | |
| "epoch": 0.62, | |
| "step": 40975 | |
| }, | |
| { | |
| "loss": 1.1396, | |
| "grad_norm": 1.1577296257019043, | |
| "learning_rate": 7.689165239854615e-05, | |
| "epoch": 0.62, | |
| "step": 41000 | |
| }, | |
| { | |
| "loss": 1.1017, | |
| "grad_norm": 1.2814109325408936, | |
| "learning_rate": 7.681655702742483e-05, | |
| "epoch": 0.62, | |
| "step": 41025 | |
| }, | |
| { | |
| "loss": 1.1205, | |
| "grad_norm": 2.1550331115722656, | |
| "learning_rate": 7.67414616563035e-05, | |
| "epoch": 0.62, | |
| "step": 41050 | |
| }, | |
| { | |
| "loss": 1.1016, | |
| "grad_norm": 0.8784595131874084, | |
| "learning_rate": 7.666636628518219e-05, | |
| "epoch": 0.62, | |
| "step": 41075 | |
| }, | |
| { | |
| "loss": 1.2382, | |
| "grad_norm": 1.159589171409607, | |
| "learning_rate": 7.659127091406086e-05, | |
| "epoch": 0.62, | |
| "step": 41100 | |
| }, | |
| { | |
| "loss": 1.2277, | |
| "grad_norm": 2.137002468109131, | |
| "learning_rate": 7.651617554293954e-05, | |
| "epoch": 0.62, | |
| "step": 41125 | |
| }, | |
| { | |
| "loss": 1.2025, | |
| "grad_norm": 1.5546560287475586, | |
| "learning_rate": 7.644108017181821e-05, | |
| "epoch": 0.62, | |
| "step": 41150 | |
| }, | |
| { | |
| "loss": 1.1419, | |
| "grad_norm": 1.3389211893081665, | |
| "learning_rate": 7.636598480069689e-05, | |
| "epoch": 0.62, | |
| "step": 41175 | |
| }, | |
| { | |
| "loss": 1.1326, | |
| "grad_norm": 1.7950804233551025, | |
| "learning_rate": 7.629088942957557e-05, | |
| "epoch": 0.62, | |
| "step": 41200 | |
| }, | |
| { | |
| "loss": 1.1444, | |
| "grad_norm": 6.291619777679443, | |
| "learning_rate": 7.621579405845423e-05, | |
| "epoch": 0.62, | |
| "step": 41225 | |
| }, | |
| { | |
| "loss": 1.1504, | |
| "grad_norm": 1.4547916650772095, | |
| "learning_rate": 7.614069868733291e-05, | |
| "epoch": 0.62, | |
| "step": 41250 | |
| }, | |
| { | |
| "loss": 1.2186, | |
| "grad_norm": 1.098617672920227, | |
| "learning_rate": 7.60656033162116e-05, | |
| "epoch": 0.62, | |
| "step": 41275 | |
| }, | |
| { | |
| "loss": 1.1272, | |
| "grad_norm": 2.3291661739349365, | |
| "learning_rate": 7.599050794509027e-05, | |
| "epoch": 0.62, | |
| "step": 41300 | |
| }, | |
| { | |
| "loss": 1.118, | |
| "grad_norm": 2.5848639011383057, | |
| "learning_rate": 7.591541257396895e-05, | |
| "epoch": 0.62, | |
| "step": 41325 | |
| }, | |
| { | |
| "loss": 1.0754, | |
| "grad_norm": 1.1806175708770752, | |
| "learning_rate": 7.584031720284763e-05, | |
| "epoch": 0.62, | |
| "step": 41350 | |
| }, | |
| { | |
| "loss": 1.1162, | |
| "grad_norm": 2.0705809593200684, | |
| "learning_rate": 7.576522183172629e-05, | |
| "epoch": 0.62, | |
| "step": 41375 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 2.1457672119140625, | |
| "learning_rate": 7.569012646060497e-05, | |
| "epoch": 0.62, | |
| "step": 41400 | |
| }, | |
| { | |
| "loss": 1.1474, | |
| "grad_norm": 2.5769121646881104, | |
| "learning_rate": 7.561503108948365e-05, | |
| "epoch": 0.62, | |
| "step": 41425 | |
| }, | |
| { | |
| "loss": 1.1491, | |
| "grad_norm": 1.2455226182937622, | |
| "learning_rate": 7.553993571836231e-05, | |
| "epoch": 0.62, | |
| "step": 41450 | |
| }, | |
| { | |
| "loss": 1.1613, | |
| "grad_norm": 1.5082957744598389, | |
| "learning_rate": 7.546484034724099e-05, | |
| "epoch": 0.62, | |
| "step": 41475 | |
| }, | |
| { | |
| "loss": 1.132, | |
| "grad_norm": 1.4825623035430908, | |
| "learning_rate": 7.538974497611969e-05, | |
| "epoch": 0.62, | |
| "step": 41500 | |
| }, | |
| { | |
| "loss": 1.132, | |
| "grad_norm": 1.490708589553833, | |
| "learning_rate": 7.531464960499835e-05, | |
| "epoch": 0.62, | |
| "step": 41525 | |
| }, | |
| { | |
| "loss": 1.1115, | |
| "grad_norm": 1.2338794469833374, | |
| "learning_rate": 7.523955423387703e-05, | |
| "epoch": 0.62, | |
| "step": 41550 | |
| }, | |
| { | |
| "loss": 1.0703, | |
| "grad_norm": 2.4002695083618164, | |
| "learning_rate": 7.516445886275571e-05, | |
| "epoch": 0.62, | |
| "step": 41575 | |
| }, | |
| { | |
| "loss": 1.1811, | |
| "grad_norm": 1.6812978982925415, | |
| "learning_rate": 7.508936349163437e-05, | |
| "epoch": 0.62, | |
| "step": 41600 | |
| }, | |
| { | |
| "loss": 1.1221, | |
| "grad_norm": 1.1364158391952515, | |
| "learning_rate": 7.501426812051305e-05, | |
| "epoch": 0.63, | |
| "step": 41625 | |
| }, | |
| { | |
| "loss": 1.1513, | |
| "grad_norm": 1.2960102558135986, | |
| "learning_rate": 7.493917274939173e-05, | |
| "epoch": 0.63, | |
| "step": 41650 | |
| }, | |
| { | |
| "loss": 1.1192, | |
| "grad_norm": 1.6246578693389893, | |
| "learning_rate": 7.48640773782704e-05, | |
| "epoch": 0.63, | |
| "step": 41675 | |
| }, | |
| { | |
| "loss": 1.1717, | |
| "grad_norm": 3.2029411792755127, | |
| "learning_rate": 7.478898200714908e-05, | |
| "epoch": 0.63, | |
| "step": 41700 | |
| }, | |
| { | |
| "loss": 1.1804, | |
| "grad_norm": 1.4046978950500488, | |
| "learning_rate": 7.471388663602777e-05, | |
| "epoch": 0.63, | |
| "step": 41725 | |
| }, | |
| { | |
| "loss": 1.075, | |
| "grad_norm": 1.478977918624878, | |
| "learning_rate": 7.463879126490643e-05, | |
| "epoch": 0.63, | |
| "step": 41750 | |
| }, | |
| { | |
| "loss": 1.1016, | |
| "grad_norm": 1.5317085981369019, | |
| "learning_rate": 7.456369589378511e-05, | |
| "epoch": 0.63, | |
| "step": 41775 | |
| }, | |
| { | |
| "loss": 1.124, | |
| "grad_norm": 1.253780484199524, | |
| "learning_rate": 7.448860052266379e-05, | |
| "epoch": 0.63, | |
| "step": 41800 | |
| }, | |
| { | |
| "loss": 1.1664, | |
| "grad_norm": 1.669776439666748, | |
| "learning_rate": 7.441350515154246e-05, | |
| "epoch": 0.63, | |
| "step": 41825 | |
| }, | |
| { | |
| "loss": 1.2216, | |
| "grad_norm": 1.3725230693817139, | |
| "learning_rate": 7.433840978042114e-05, | |
| "epoch": 0.63, | |
| "step": 41850 | |
| }, | |
| { | |
| "loss": 1.1648, | |
| "grad_norm": 1.3599203824996948, | |
| "learning_rate": 7.426331440929981e-05, | |
| "epoch": 0.63, | |
| "step": 41875 | |
| }, | |
| { | |
| "loss": 1.1791, | |
| "grad_norm": 0.8592632412910461, | |
| "learning_rate": 7.418821903817848e-05, | |
| "epoch": 0.63, | |
| "step": 41900 | |
| }, | |
| { | |
| "loss": 1.0981, | |
| "grad_norm": 2.0452094078063965, | |
| "learning_rate": 7.411312366705717e-05, | |
| "epoch": 0.63, | |
| "step": 41925 | |
| }, | |
| { | |
| "loss": 1.1431, | |
| "grad_norm": 2.367089033126831, | |
| "learning_rate": 7.403802829593585e-05, | |
| "epoch": 0.63, | |
| "step": 41950 | |
| }, | |
| { | |
| "loss": 1.1168, | |
| "grad_norm": 1.2674223184585571, | |
| "learning_rate": 7.396293292481452e-05, | |
| "epoch": 0.63, | |
| "step": 41975 | |
| }, | |
| { | |
| "loss": 1.1096, | |
| "grad_norm": 1.589810848236084, | |
| "learning_rate": 7.38878375536932e-05, | |
| "epoch": 0.63, | |
| "step": 42000 | |
| }, | |
| { | |
| "loss": 1.1267, | |
| "grad_norm": 1.9553534984588623, | |
| "learning_rate": 7.381274218257187e-05, | |
| "epoch": 0.63, | |
| "step": 42025 | |
| }, | |
| { | |
| "loss": 1.1687, | |
| "grad_norm": 1.646224021911621, | |
| "learning_rate": 7.373764681145054e-05, | |
| "epoch": 0.63, | |
| "step": 42050 | |
| }, | |
| { | |
| "loss": 1.1368, | |
| "grad_norm": 1.515450119972229, | |
| "learning_rate": 7.366255144032922e-05, | |
| "epoch": 0.63, | |
| "step": 42075 | |
| }, | |
| { | |
| "loss": 1.1289, | |
| "grad_norm": 1.4205098152160645, | |
| "learning_rate": 7.35874560692079e-05, | |
| "epoch": 0.63, | |
| "step": 42100 | |
| }, | |
| { | |
| "loss": 1.0966, | |
| "grad_norm": 1.0404484272003174, | |
| "learning_rate": 7.351236069808656e-05, | |
| "epoch": 0.63, | |
| "step": 42125 | |
| }, | |
| { | |
| "loss": 1.0713, | |
| "grad_norm": 1.615555763244629, | |
| "learning_rate": 7.343726532696525e-05, | |
| "epoch": 0.63, | |
| "step": 42150 | |
| }, | |
| { | |
| "loss": 1.1928, | |
| "grad_norm": 1.5871440172195435, | |
| "learning_rate": 7.336216995584393e-05, | |
| "epoch": 0.63, | |
| "step": 42175 | |
| }, | |
| { | |
| "loss": 1.1243, | |
| "grad_norm": 1.4905834197998047, | |
| "learning_rate": 7.32870745847226e-05, | |
| "epoch": 0.63, | |
| "step": 42200 | |
| }, | |
| { | |
| "loss": 1.1623, | |
| "grad_norm": 1.8793108463287354, | |
| "learning_rate": 7.321197921360128e-05, | |
| "epoch": 0.63, | |
| "step": 42225 | |
| }, | |
| { | |
| "loss": 1.1306, | |
| "grad_norm": 1.7370017766952515, | |
| "learning_rate": 7.313688384247996e-05, | |
| "epoch": 0.63, | |
| "step": 42250 | |
| }, | |
| { | |
| "loss": 1.1043, | |
| "grad_norm": 1.2322642803192139, | |
| "learning_rate": 7.306178847135862e-05, | |
| "epoch": 0.63, | |
| "step": 42275 | |
| }, | |
| { | |
| "loss": 1.1336, | |
| "grad_norm": 1.9522937536239624, | |
| "learning_rate": 7.29866931002373e-05, | |
| "epoch": 0.64, | |
| "step": 42300 | |
| }, | |
| { | |
| "loss": 1.2031, | |
| "grad_norm": 1.7146880626678467, | |
| "learning_rate": 7.291159772911598e-05, | |
| "epoch": 0.64, | |
| "step": 42325 | |
| }, | |
| { | |
| "loss": 1.0689, | |
| "grad_norm": 1.353827953338623, | |
| "learning_rate": 7.283650235799465e-05, | |
| "epoch": 0.64, | |
| "step": 42350 | |
| }, | |
| { | |
| "loss": 1.046, | |
| "grad_norm": 1.0300394296646118, | |
| "learning_rate": 7.276140698687334e-05, | |
| "epoch": 0.64, | |
| "step": 42375 | |
| }, | |
| { | |
| "loss": 1.1271, | |
| "grad_norm": 1.7847486734390259, | |
| "learning_rate": 7.268631161575202e-05, | |
| "epoch": 0.64, | |
| "step": 42400 | |
| }, | |
| { | |
| "loss": 1.1355, | |
| "grad_norm": 1.5269598960876465, | |
| "learning_rate": 7.261121624463068e-05, | |
| "epoch": 0.64, | |
| "step": 42425 | |
| }, | |
| { | |
| "loss": 1.1399, | |
| "grad_norm": 1.863671898841858, | |
| "learning_rate": 7.253612087350936e-05, | |
| "epoch": 0.64, | |
| "step": 42450 | |
| }, | |
| { | |
| "loss": 1.1696, | |
| "grad_norm": 1.3562769889831543, | |
| "learning_rate": 7.246102550238804e-05, | |
| "epoch": 0.64, | |
| "step": 42475 | |
| }, | |
| { | |
| "loss": 1.1442, | |
| "grad_norm": 1.9278922080993652, | |
| "learning_rate": 7.23859301312667e-05, | |
| "epoch": 0.64, | |
| "step": 42500 | |
| }, | |
| { | |
| "loss": 1.1339, | |
| "grad_norm": 1.8279402256011963, | |
| "learning_rate": 7.231083476014538e-05, | |
| "epoch": 0.64, | |
| "step": 42525 | |
| }, | |
| { | |
| "loss": 1.1319, | |
| "grad_norm": 1.165343999862671, | |
| "learning_rate": 7.223573938902406e-05, | |
| "epoch": 0.64, | |
| "step": 42550 | |
| }, | |
| { | |
| "loss": 1.0568, | |
| "grad_norm": 1.109803318977356, | |
| "learning_rate": 7.216064401790274e-05, | |
| "epoch": 0.64, | |
| "step": 42575 | |
| }, | |
| { | |
| "loss": 1.214, | |
| "grad_norm": 1.3198269605636597, | |
| "learning_rate": 7.208554864678142e-05, | |
| "epoch": 0.64, | |
| "step": 42600 | |
| }, | |
| { | |
| "loss": 1.1511, | |
| "grad_norm": 1.4255495071411133, | |
| "learning_rate": 7.20104532756601e-05, | |
| "epoch": 0.64, | |
| "step": 42625 | |
| }, | |
| { | |
| "loss": 1.2014, | |
| "grad_norm": 2.5674383640289307, | |
| "learning_rate": 7.193535790453876e-05, | |
| "epoch": 0.64, | |
| "step": 42650 | |
| }, | |
| { | |
| "loss": 1.1177, | |
| "grad_norm": 2.265868663787842, | |
| "learning_rate": 7.186026253341744e-05, | |
| "epoch": 0.64, | |
| "step": 42675 | |
| }, | |
| { | |
| "loss": 1.151, | |
| "grad_norm": 1.6792558431625366, | |
| "learning_rate": 7.178516716229612e-05, | |
| "epoch": 0.64, | |
| "step": 42700 | |
| }, | |
| { | |
| "loss": 1.211, | |
| "grad_norm": 1.5409029722213745, | |
| "learning_rate": 7.171007179117479e-05, | |
| "epoch": 0.64, | |
| "step": 42725 | |
| }, | |
| { | |
| "loss": 1.1475, | |
| "grad_norm": 2.21852970123291, | |
| "learning_rate": 7.163497642005347e-05, | |
| "epoch": 0.64, | |
| "step": 42750 | |
| }, | |
| { | |
| "loss": 1.1559, | |
| "grad_norm": 1.770150065422058, | |
| "learning_rate": 7.155988104893215e-05, | |
| "epoch": 0.64, | |
| "step": 42775 | |
| }, | |
| { | |
| "loss": 1.1443, | |
| "grad_norm": 2.0208752155303955, | |
| "learning_rate": 7.148478567781082e-05, | |
| "epoch": 0.64, | |
| "step": 42800 | |
| }, | |
| { | |
| "loss": 1.1486, | |
| "grad_norm": 1.3952795267105103, | |
| "learning_rate": 7.14096903066895e-05, | |
| "epoch": 0.64, | |
| "step": 42825 | |
| }, | |
| { | |
| "loss": 1.1166, | |
| "grad_norm": 1.055246114730835, | |
| "learning_rate": 7.133459493556818e-05, | |
| "epoch": 0.64, | |
| "step": 42850 | |
| }, | |
| { | |
| "loss": 1.1424, | |
| "grad_norm": 1.9727169275283813, | |
| "learning_rate": 7.125949956444685e-05, | |
| "epoch": 0.64, | |
| "step": 42875 | |
| }, | |
| { | |
| "loss": 1.1111, | |
| "grad_norm": 1.2607585191726685, | |
| "learning_rate": 7.118440419332553e-05, | |
| "epoch": 0.64, | |
| "step": 42900 | |
| }, | |
| { | |
| "loss": 1.1684, | |
| "grad_norm": 1.3263331651687622, | |
| "learning_rate": 7.11093088222042e-05, | |
| "epoch": 0.64, | |
| "step": 42925 | |
| }, | |
| { | |
| "loss": 1.1461, | |
| "grad_norm": 1.7771466970443726, | |
| "learning_rate": 7.103421345108287e-05, | |
| "epoch": 0.65, | |
| "step": 42950 | |
| }, | |
| { | |
| "loss": 1.0846, | |
| "grad_norm": 1.842961311340332, | |
| "learning_rate": 7.095911807996155e-05, | |
| "epoch": 0.65, | |
| "step": 42975 | |
| }, | |
| { | |
| "loss": 1.143, | |
| "grad_norm": 1.8266829252243042, | |
| "learning_rate": 7.088402270884023e-05, | |
| "epoch": 0.65, | |
| "step": 43000 | |
| }, | |
| { | |
| "loss": 1.1259, | |
| "grad_norm": 2.703138828277588, | |
| "learning_rate": 7.08089273377189e-05, | |
| "epoch": 0.65, | |
| "step": 43025 | |
| }, | |
| { | |
| "loss": 1.1592, | |
| "grad_norm": 1.7669565677642822, | |
| "learning_rate": 7.073383196659759e-05, | |
| "epoch": 0.65, | |
| "step": 43050 | |
| }, | |
| { | |
| "loss": 1.157, | |
| "grad_norm": 2.708080530166626, | |
| "learning_rate": 7.065873659547626e-05, | |
| "epoch": 0.65, | |
| "step": 43075 | |
| }, | |
| { | |
| "loss": 1.1409, | |
| "grad_norm": 0.9941558241844177, | |
| "learning_rate": 7.058364122435493e-05, | |
| "epoch": 0.65, | |
| "step": 43100 | |
| }, | |
| { | |
| "loss": 1.09, | |
| "grad_norm": 2.0420825481414795, | |
| "learning_rate": 7.050854585323361e-05, | |
| "epoch": 0.65, | |
| "step": 43125 | |
| }, | |
| { | |
| "loss": 1.1426, | |
| "grad_norm": 2.205864667892456, | |
| "learning_rate": 7.043345048211229e-05, | |
| "epoch": 0.65, | |
| "step": 43150 | |
| }, | |
| { | |
| "loss": 1.0324, | |
| "grad_norm": 1.426829218864441, | |
| "learning_rate": 7.035835511099095e-05, | |
| "epoch": 0.65, | |
| "step": 43175 | |
| }, | |
| { | |
| "loss": 1.0635, | |
| "grad_norm": 2.733449935913086, | |
| "learning_rate": 7.028325973986963e-05, | |
| "epoch": 0.65, | |
| "step": 43200 | |
| }, | |
| { | |
| "loss": 1.0572, | |
| "grad_norm": 1.3944034576416016, | |
| "learning_rate": 7.020816436874832e-05, | |
| "epoch": 0.65, | |
| "step": 43225 | |
| }, | |
| { | |
| "loss": 1.0696, | |
| "grad_norm": 3.0517678260803223, | |
| "learning_rate": 7.013306899762699e-05, | |
| "epoch": 0.65, | |
| "step": 43250 | |
| }, | |
| { | |
| "loss": 1.1799, | |
| "grad_norm": 1.6215708255767822, | |
| "learning_rate": 7.005797362650567e-05, | |
| "epoch": 0.65, | |
| "step": 43275 | |
| }, | |
| { | |
| "loss": 1.2173, | |
| "grad_norm": 1.6113760471343994, | |
| "learning_rate": 6.99858820702292e-05, | |
| "epoch": 0.65, | |
| "step": 43300 | |
| }, | |
| { | |
| "loss": 1.1714, | |
| "grad_norm": 2.9850752353668213, | |
| "learning_rate": 6.991078669910786e-05, | |
| "epoch": 0.65, | |
| "step": 43325 | |
| }, | |
| { | |
| "loss": 1.0623, | |
| "grad_norm": 1.9807687997817993, | |
| "learning_rate": 6.983569132798654e-05, | |
| "epoch": 0.65, | |
| "step": 43350 | |
| }, | |
| { | |
| "loss": 1.1881, | |
| "grad_norm": 1.4547510147094727, | |
| "learning_rate": 6.976059595686523e-05, | |
| "epoch": 0.65, | |
| "step": 43375 | |
| }, | |
| { | |
| "loss": 1.1096, | |
| "grad_norm": 1.768142819404602, | |
| "learning_rate": 6.96855005857439e-05, | |
| "epoch": 0.65, | |
| "step": 43400 | |
| }, | |
| { | |
| "loss": 1.1513, | |
| "grad_norm": 1.4340214729309082, | |
| "learning_rate": 6.961040521462258e-05, | |
| "epoch": 0.65, | |
| "step": 43425 | |
| }, | |
| { | |
| "loss": 1.1259, | |
| "grad_norm": 2.260941743850708, | |
| "learning_rate": 6.953530984350126e-05, | |
| "epoch": 0.65, | |
| "step": 43450 | |
| }, | |
| { | |
| "loss": 1.0911, | |
| "grad_norm": 1.3704345226287842, | |
| "learning_rate": 6.946021447237992e-05, | |
| "epoch": 0.65, | |
| "step": 43475 | |
| }, | |
| { | |
| "loss": 1.1814, | |
| "grad_norm": 2.298049211502075, | |
| "learning_rate": 6.93851191012586e-05, | |
| "epoch": 0.65, | |
| "step": 43500 | |
| }, | |
| { | |
| "loss": 1.127, | |
| "grad_norm": 1.4709994792938232, | |
| "learning_rate": 6.931002373013728e-05, | |
| "epoch": 0.65, | |
| "step": 43525 | |
| }, | |
| { | |
| "loss": 1.0889, | |
| "grad_norm": 1.7260873317718506, | |
| "learning_rate": 6.923492835901594e-05, | |
| "epoch": 0.65, | |
| "step": 43550 | |
| }, | |
| { | |
| "loss": 1.1871, | |
| "grad_norm": 1.912359595298767, | |
| "learning_rate": 6.915983298789464e-05, | |
| "epoch": 0.65, | |
| "step": 43575 | |
| }, | |
| { | |
| "loss": 1.1445, | |
| "grad_norm": 1.4736065864562988, | |
| "learning_rate": 6.908473761677332e-05, | |
| "epoch": 0.65, | |
| "step": 43600 | |
| }, | |
| { | |
| "loss": 1.1363, | |
| "grad_norm": 2.0081377029418945, | |
| "learning_rate": 6.900964224565198e-05, | |
| "epoch": 0.66, | |
| "step": 43625 | |
| }, | |
| { | |
| "loss": 1.1329, | |
| "grad_norm": 1.3601313829421997, | |
| "learning_rate": 6.893454687453066e-05, | |
| "epoch": 0.66, | |
| "step": 43650 | |
| }, | |
| { | |
| "loss": 1.0892, | |
| "grad_norm": 1.9667214155197144, | |
| "learning_rate": 6.885945150340934e-05, | |
| "epoch": 0.66, | |
| "step": 43675 | |
| }, | |
| { | |
| "loss": 1.1483, | |
| "grad_norm": 1.3687251806259155, | |
| "learning_rate": 6.878735994713285e-05, | |
| "epoch": 0.66, | |
| "step": 43700 | |
| }, | |
| { | |
| "loss": 1.0915, | |
| "grad_norm": 2.4071569442749023, | |
| "learning_rate": 6.871226457601155e-05, | |
| "epoch": 0.66, | |
| "step": 43725 | |
| }, | |
| { | |
| "loss": 1.1005, | |
| "grad_norm": 1.577147126197815, | |
| "learning_rate": 6.863716920489021e-05, | |
| "epoch": 0.66, | |
| "step": 43750 | |
| }, | |
| { | |
| "loss": 1.1643, | |
| "grad_norm": 4.050117015838623, | |
| "learning_rate": 6.856207383376889e-05, | |
| "epoch": 0.66, | |
| "step": 43775 | |
| }, | |
| { | |
| "loss": 1.1211, | |
| "grad_norm": 1.6273080110549927, | |
| "learning_rate": 6.848697846264757e-05, | |
| "epoch": 0.66, | |
| "step": 43800 | |
| }, | |
| { | |
| "loss": 1.082, | |
| "grad_norm": 1.4899144172668457, | |
| "learning_rate": 6.841188309152623e-05, | |
| "epoch": 0.66, | |
| "step": 43825 | |
| }, | |
| { | |
| "loss": 1.1292, | |
| "grad_norm": 2.176234006881714, | |
| "learning_rate": 6.833678772040491e-05, | |
| "epoch": 0.66, | |
| "step": 43850 | |
| }, | |
| { | |
| "loss": 1.1189, | |
| "grad_norm": 1.6825004816055298, | |
| "learning_rate": 6.826169234928359e-05, | |
| "epoch": 0.66, | |
| "step": 43875 | |
| }, | |
| { | |
| "loss": 1.0886, | |
| "grad_norm": 1.342085361480713, | |
| "learning_rate": 6.818659697816227e-05, | |
| "epoch": 0.66, | |
| "step": 43900 | |
| }, | |
| { | |
| "loss": 1.146, | |
| "grad_norm": 0.8940933346748352, | |
| "learning_rate": 6.811150160704095e-05, | |
| "epoch": 0.66, | |
| "step": 43925 | |
| }, | |
| { | |
| "loss": 1.0925, | |
| "grad_norm": 1.4835485219955444, | |
| "learning_rate": 6.803640623591963e-05, | |
| "epoch": 0.66, | |
| "step": 43950 | |
| }, | |
| { | |
| "loss": 1.1992, | |
| "grad_norm": 1.9649572372436523, | |
| "learning_rate": 6.79613108647983e-05, | |
| "epoch": 0.66, | |
| "step": 43975 | |
| }, | |
| { | |
| "loss": 1.1081, | |
| "grad_norm": 1.4442362785339355, | |
| "learning_rate": 6.788621549367697e-05, | |
| "epoch": 0.66, | |
| "step": 44000 | |
| }, | |
| { | |
| "loss": 1.1475, | |
| "grad_norm": 3.4296488761901855, | |
| "learning_rate": 6.781112012255565e-05, | |
| "epoch": 0.66, | |
| "step": 44025 | |
| }, | |
| { | |
| "loss": 1.1343, | |
| "grad_norm": 2.3116354942321777, | |
| "learning_rate": 6.773602475143432e-05, | |
| "epoch": 0.66, | |
| "step": 44050 | |
| }, | |
| { | |
| "loss": 1.1686, | |
| "grad_norm": 1.6859666109085083, | |
| "learning_rate": 6.7660929380313e-05, | |
| "epoch": 0.66, | |
| "step": 44075 | |
| }, | |
| { | |
| "loss": 1.1612, | |
| "grad_norm": 1.4094436168670654, | |
| "learning_rate": 6.758583400919168e-05, | |
| "epoch": 0.66, | |
| "step": 44100 | |
| }, | |
| { | |
| "loss": 1.1254, | |
| "grad_norm": 2.2246947288513184, | |
| "learning_rate": 6.751073863807035e-05, | |
| "epoch": 0.66, | |
| "step": 44125 | |
| }, | |
| { | |
| "loss": 1.16, | |
| "grad_norm": 1.2146118879318237, | |
| "learning_rate": 6.743564326694903e-05, | |
| "epoch": 0.66, | |
| "step": 44150 | |
| }, | |
| { | |
| "loss": 1.1512, | |
| "grad_norm": 1.7137471437454224, | |
| "learning_rate": 6.736054789582771e-05, | |
| "epoch": 0.66, | |
| "step": 44175 | |
| }, | |
| { | |
| "loss": 1.0637, | |
| "grad_norm": 1.3975019454956055, | |
| "learning_rate": 6.728545252470638e-05, | |
| "epoch": 0.66, | |
| "step": 44200 | |
| }, | |
| { | |
| "loss": 1.1295, | |
| "grad_norm": 1.0238609313964844, | |
| "learning_rate": 6.721035715358506e-05, | |
| "epoch": 0.66, | |
| "step": 44225 | |
| }, | |
| { | |
| "loss": 1.0978, | |
| "grad_norm": 1.798096776008606, | |
| "learning_rate": 6.713526178246373e-05, | |
| "epoch": 0.66, | |
| "step": 44250 | |
| }, | |
| { | |
| "loss": 1.1513, | |
| "grad_norm": 1.8276596069335938, | |
| "learning_rate": 6.70601664113424e-05, | |
| "epoch": 0.66, | |
| "step": 44275 | |
| }, | |
| { | |
| "loss": 1.1275, | |
| "grad_norm": 2.5206449031829834, | |
| "learning_rate": 6.698507104022108e-05, | |
| "epoch": 0.67, | |
| "step": 44300 | |
| }, | |
| { | |
| "loss": 1.1837, | |
| "grad_norm": 1.401331901550293, | |
| "learning_rate": 6.690997566909976e-05, | |
| "epoch": 0.67, | |
| "step": 44325 | |
| }, | |
| { | |
| "loss": 1.137, | |
| "grad_norm": 1.3737742900848389, | |
| "learning_rate": 6.683488029797844e-05, | |
| "epoch": 0.67, | |
| "step": 44350 | |
| }, | |
| { | |
| "loss": 1.161, | |
| "grad_norm": 1.461982011795044, | |
| "learning_rate": 6.675978492685712e-05, | |
| "epoch": 0.67, | |
| "step": 44375 | |
| }, | |
| { | |
| "loss": 1.1194, | |
| "grad_norm": 1.4352518320083618, | |
| "learning_rate": 6.66846895557358e-05, | |
| "epoch": 0.67, | |
| "step": 44400 | |
| }, | |
| { | |
| "loss": 1.2318, | |
| "grad_norm": 1.9560725688934326, | |
| "learning_rate": 6.660959418461446e-05, | |
| "epoch": 0.67, | |
| "step": 44425 | |
| }, | |
| { | |
| "loss": 1.1218, | |
| "grad_norm": 1.0845290422439575, | |
| "learning_rate": 6.653449881349314e-05, | |
| "epoch": 0.67, | |
| "step": 44450 | |
| }, | |
| { | |
| "loss": 1.2239, | |
| "grad_norm": 1.4423961639404297, | |
| "learning_rate": 6.645940344237182e-05, | |
| "epoch": 0.67, | |
| "step": 44475 | |
| }, | |
| { | |
| "loss": 1.18, | |
| "grad_norm": 2.2228844165802, | |
| "learning_rate": 6.638430807125048e-05, | |
| "epoch": 0.67, | |
| "step": 44500 | |
| }, | |
| { | |
| "loss": 1.1529, | |
| "grad_norm": 1.524857521057129, | |
| "learning_rate": 6.630921270012916e-05, | |
| "epoch": 0.67, | |
| "step": 44525 | |
| }, | |
| { | |
| "loss": 1.121, | |
| "grad_norm": 1.349579930305481, | |
| "learning_rate": 6.623411732900784e-05, | |
| "epoch": 0.67, | |
| "step": 44550 | |
| }, | |
| { | |
| "loss": 1.1108, | |
| "grad_norm": 1.5544127225875854, | |
| "learning_rate": 6.615902195788652e-05, | |
| "epoch": 0.67, | |
| "step": 44575 | |
| }, | |
| { | |
| "loss": 1.1202, | |
| "grad_norm": 1.1992143392562866, | |
| "learning_rate": 6.60839265867652e-05, | |
| "epoch": 0.67, | |
| "step": 44600 | |
| }, | |
| { | |
| "loss": 1.1227, | |
| "grad_norm": 1.020192265510559, | |
| "learning_rate": 6.600883121564388e-05, | |
| "epoch": 0.67, | |
| "step": 44625 | |
| }, | |
| { | |
| "loss": 1.0743, | |
| "grad_norm": 0.9846924543380737, | |
| "learning_rate": 6.593373584452254e-05, | |
| "epoch": 0.67, | |
| "step": 44650 | |
| }, | |
| { | |
| "loss": 1.1423, | |
| "grad_norm": 1.3693522214889526, | |
| "learning_rate": 6.585864047340122e-05, | |
| "epoch": 0.67, | |
| "step": 44675 | |
| }, | |
| { | |
| "loss": 1.1301, | |
| "grad_norm": 1.9597666263580322, | |
| "learning_rate": 6.57835451022799e-05, | |
| "epoch": 0.67, | |
| "step": 44700 | |
| }, | |
| { | |
| "loss": 1.1554, | |
| "grad_norm": 1.1841599941253662, | |
| "learning_rate": 6.570844973115857e-05, | |
| "epoch": 0.67, | |
| "step": 44725 | |
| }, | |
| { | |
| "loss": 1.1822, | |
| "grad_norm": 1.071419596672058, | |
| "learning_rate": 6.563335436003724e-05, | |
| "epoch": 0.67, | |
| "step": 44750 | |
| }, | |
| { | |
| "loss": 1.1234, | |
| "grad_norm": 1.3992092609405518, | |
| "learning_rate": 6.555825898891592e-05, | |
| "epoch": 0.67, | |
| "step": 44775 | |
| }, | |
| { | |
| "loss": 1.089, | |
| "grad_norm": 1.3875787258148193, | |
| "learning_rate": 6.54831636177946e-05, | |
| "epoch": 0.67, | |
| "step": 44800 | |
| }, | |
| { | |
| "loss": 1.1601, | |
| "grad_norm": 1.7100720405578613, | |
| "learning_rate": 6.540806824667328e-05, | |
| "epoch": 0.67, | |
| "step": 44825 | |
| }, | |
| { | |
| "loss": 1.1281, | |
| "grad_norm": 1.3852653503417969, | |
| "learning_rate": 6.533297287555196e-05, | |
| "epoch": 0.67, | |
| "step": 44850 | |
| }, | |
| { | |
| "loss": 1.1651, | |
| "grad_norm": 2.1557846069335938, | |
| "learning_rate": 6.525787750443063e-05, | |
| "epoch": 0.67, | |
| "step": 44875 | |
| }, | |
| { | |
| "loss": 1.1061, | |
| "grad_norm": 1.711010217666626, | |
| "learning_rate": 6.51827821333093e-05, | |
| "epoch": 0.67, | |
| "step": 44900 | |
| }, | |
| { | |
| "loss": 1.1282, | |
| "grad_norm": 1.7825603485107422, | |
| "learning_rate": 6.510768676218798e-05, | |
| "epoch": 0.67, | |
| "step": 44925 | |
| }, | |
| { | |
| "loss": 1.1178, | |
| "grad_norm": 1.6146150827407837, | |
| "learning_rate": 6.503259139106666e-05, | |
| "epoch": 0.68, | |
| "step": 44950 | |
| }, | |
| { | |
| "loss": 1.1025, | |
| "grad_norm": 2.3008651733398438, | |
| "learning_rate": 6.495749601994533e-05, | |
| "epoch": 0.68, | |
| "step": 44975 | |
| }, | |
| { | |
| "loss": 1.1944, | |
| "grad_norm": 1.2394930124282837, | |
| "learning_rate": 6.4882400648824e-05, | |
| "epoch": 0.68, | |
| "step": 45000 | |
| }, | |
| { | |
| "loss": 1.1354, | |
| "grad_norm": 1.3226121664047241, | |
| "learning_rate": 6.480730527770268e-05, | |
| "epoch": 0.68, | |
| "step": 45025 | |
| }, | |
| { | |
| "loss": 1.082, | |
| "grad_norm": 1.0096830129623413, | |
| "learning_rate": 6.473220990658136e-05, | |
| "epoch": 0.68, | |
| "step": 45050 | |
| }, | |
| { | |
| "loss": 1.1808, | |
| "grad_norm": 1.8499752283096313, | |
| "learning_rate": 6.465711453546004e-05, | |
| "epoch": 0.68, | |
| "step": 45075 | |
| }, | |
| { | |
| "loss": 1.1287, | |
| "grad_norm": 1.8377041816711426, | |
| "learning_rate": 6.458201916433871e-05, | |
| "epoch": 0.68, | |
| "step": 45100 | |
| }, | |
| { | |
| "loss": 1.1541, | |
| "grad_norm": 1.5449110269546509, | |
| "learning_rate": 6.450692379321739e-05, | |
| "epoch": 0.68, | |
| "step": 45125 | |
| }, | |
| { | |
| "loss": 1.054, | |
| "grad_norm": 1.7001621723175049, | |
| "learning_rate": 6.443182842209607e-05, | |
| "epoch": 0.68, | |
| "step": 45150 | |
| }, | |
| { | |
| "loss": 1.0684, | |
| "grad_norm": 1.0393651723861694, | |
| "learning_rate": 6.435673305097474e-05, | |
| "epoch": 0.68, | |
| "step": 45175 | |
| }, | |
| { | |
| "loss": 1.1805, | |
| "grad_norm": 1.6634325981140137, | |
| "learning_rate": 6.428163767985341e-05, | |
| "epoch": 0.68, | |
| "step": 45200 | |
| }, | |
| { | |
| "loss": 1.1241, | |
| "grad_norm": 1.3879557847976685, | |
| "learning_rate": 6.42065423087321e-05, | |
| "epoch": 0.68, | |
| "step": 45225 | |
| }, | |
| { | |
| "loss": 1.0822, | |
| "grad_norm": 1.3733693361282349, | |
| "learning_rate": 6.413144693761077e-05, | |
| "epoch": 0.68, | |
| "step": 45250 | |
| }, | |
| { | |
| "loss": 1.1762, | |
| "grad_norm": 1.540552020072937, | |
| "learning_rate": 6.405635156648945e-05, | |
| "epoch": 0.68, | |
| "step": 45275 | |
| }, | |
| { | |
| "loss": 1.1056, | |
| "grad_norm": 1.7979633808135986, | |
| "learning_rate": 6.398125619536813e-05, | |
| "epoch": 0.68, | |
| "step": 45300 | |
| }, | |
| { | |
| "loss": 1.0967, | |
| "grad_norm": 1.601650357246399, | |
| "learning_rate": 6.390616082424679e-05, | |
| "epoch": 0.68, | |
| "step": 45325 | |
| }, | |
| { | |
| "loss": 1.13, | |
| "grad_norm": 1.2980515956878662, | |
| "learning_rate": 6.383106545312547e-05, | |
| "epoch": 0.68, | |
| "step": 45350 | |
| }, | |
| { | |
| "loss": 1.1626, | |
| "grad_norm": 1.1858327388763428, | |
| "learning_rate": 6.375597008200415e-05, | |
| "epoch": 0.68, | |
| "step": 45375 | |
| }, | |
| { | |
| "loss": 1.097, | |
| "grad_norm": 1.3785401582717896, | |
| "learning_rate": 6.368087471088283e-05, | |
| "epoch": 0.68, | |
| "step": 45400 | |
| }, | |
| { | |
| "loss": 1.1721, | |
| "grad_norm": 0.9480896592140198, | |
| "learning_rate": 6.360577933976149e-05, | |
| "epoch": 0.68, | |
| "step": 45425 | |
| }, | |
| { | |
| "loss": 1.1218, | |
| "grad_norm": 1.55319344997406, | |
| "learning_rate": 6.353068396864019e-05, | |
| "epoch": 0.68, | |
| "step": 45450 | |
| }, | |
| { | |
| "loss": 1.1096, | |
| "grad_norm": 1.589424729347229, | |
| "learning_rate": 6.345558859751885e-05, | |
| "epoch": 0.68, | |
| "step": 45475 | |
| }, | |
| { | |
| "loss": 1.1252, | |
| "grad_norm": 1.1488457918167114, | |
| "learning_rate": 6.338049322639753e-05, | |
| "epoch": 0.68, | |
| "step": 45500 | |
| }, | |
| { | |
| "loss": 1.1776, | |
| "grad_norm": 2.43381667137146, | |
| "learning_rate": 6.330539785527621e-05, | |
| "epoch": 0.68, | |
| "step": 45525 | |
| }, | |
| { | |
| "loss": 1.1062, | |
| "grad_norm": 1.4571008682250977, | |
| "learning_rate": 6.323030248415487e-05, | |
| "epoch": 0.68, | |
| "step": 45550 | |
| }, | |
| { | |
| "loss": 1.0569, | |
| "grad_norm": 2.75005841255188, | |
| "learning_rate": 6.315520711303355e-05, | |
| "epoch": 0.68, | |
| "step": 45575 | |
| }, | |
| { | |
| "loss": 1.1457, | |
| "grad_norm": 1.2686755657196045, | |
| "learning_rate": 6.308011174191223e-05, | |
| "epoch": 0.68, | |
| "step": 45600 | |
| }, | |
| { | |
| "loss": 1.1216, | |
| "grad_norm": 1.4540945291519165, | |
| "learning_rate": 6.300501637079091e-05, | |
| "epoch": 0.69, | |
| "step": 45625 | |
| }, | |
| { | |
| "loss": 1.1018, | |
| "grad_norm": 1.2058906555175781, | |
| "learning_rate": 6.292992099966958e-05, | |
| "epoch": 0.69, | |
| "step": 45650 | |
| }, | |
| { | |
| "loss": 1.1387, | |
| "grad_norm": 2.0346646308898926, | |
| "learning_rate": 6.285482562854827e-05, | |
| "epoch": 0.69, | |
| "step": 45675 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 1.4020074605941772, | |
| "learning_rate": 6.277973025742693e-05, | |
| "epoch": 0.69, | |
| "step": 45700 | |
| }, | |
| { | |
| "loss": 1.1201, | |
| "grad_norm": 1.4608796834945679, | |
| "learning_rate": 6.270463488630561e-05, | |
| "epoch": 0.69, | |
| "step": 45725 | |
| }, | |
| { | |
| "loss": 1.1399, | |
| "grad_norm": 1.7157222032546997, | |
| "learning_rate": 6.262953951518429e-05, | |
| "epoch": 0.69, | |
| "step": 45750 | |
| }, | |
| { | |
| "loss": 1.1118, | |
| "grad_norm": 1.1699299812316895, | |
| "learning_rate": 6.255444414406296e-05, | |
| "epoch": 0.69, | |
| "step": 45775 | |
| }, | |
| { | |
| "loss": 1.1491, | |
| "grad_norm": 1.5801405906677246, | |
| "learning_rate": 6.247934877294163e-05, | |
| "epoch": 0.69, | |
| "step": 45800 | |
| }, | |
| { | |
| "loss": 1.1591, | |
| "grad_norm": 2.6088712215423584, | |
| "learning_rate": 6.240425340182031e-05, | |
| "epoch": 0.69, | |
| "step": 45825 | |
| }, | |
| { | |
| "loss": 1.1241, | |
| "grad_norm": 1.1088968515396118, | |
| "learning_rate": 6.232915803069899e-05, | |
| "epoch": 0.69, | |
| "step": 45850 | |
| }, | |
| { | |
| "loss": 1.1937, | |
| "grad_norm": 1.9700263738632202, | |
| "learning_rate": 6.225406265957767e-05, | |
| "epoch": 0.69, | |
| "step": 45875 | |
| }, | |
| { | |
| "loss": 1.0887, | |
| "grad_norm": 1.8540663719177246, | |
| "learning_rate": 6.217896728845635e-05, | |
| "epoch": 0.69, | |
| "step": 45900 | |
| }, | |
| { | |
| "loss": 1.1068, | |
| "grad_norm": 1.1432939767837524, | |
| "learning_rate": 6.210387191733502e-05, | |
| "epoch": 0.69, | |
| "step": 45925 | |
| }, | |
| { | |
| "loss": 1.1209, | |
| "grad_norm": 1.9326074123382568, | |
| "learning_rate": 6.20287765462137e-05, | |
| "epoch": 0.69, | |
| "step": 45950 | |
| }, | |
| { | |
| "loss": 1.1356, | |
| "grad_norm": 1.928639531135559, | |
| "learning_rate": 6.195368117509237e-05, | |
| "epoch": 0.69, | |
| "step": 45975 | |
| }, | |
| { | |
| "loss": 1.1867, | |
| "grad_norm": 1.4352381229400635, | |
| "learning_rate": 6.187858580397104e-05, | |
| "epoch": 0.69, | |
| "step": 46000 | |
| }, | |
| { | |
| "loss": 1.1867, | |
| "grad_norm": 1.4124820232391357, | |
| "learning_rate": 6.180349043284972e-05, | |
| "epoch": 0.69, | |
| "step": 46025 | |
| }, | |
| { | |
| "loss": 1.1144, | |
| "grad_norm": 2.557101249694824, | |
| "learning_rate": 6.17283950617284e-05, | |
| "epoch": 0.69, | |
| "step": 46050 | |
| }, | |
| { | |
| "loss": 1.0894, | |
| "grad_norm": 1.6086410284042358, | |
| "learning_rate": 6.165630350545192e-05, | |
| "epoch": 0.69, | |
| "step": 46075 | |
| }, | |
| { | |
| "loss": 1.1666, | |
| "grad_norm": 1.3408067226409912, | |
| "learning_rate": 6.15812081343306e-05, | |
| "epoch": 0.69, | |
| "step": 46100 | |
| }, | |
| { | |
| "loss": 1.0923, | |
| "grad_norm": 1.303733229637146, | |
| "learning_rate": 6.150611276320928e-05, | |
| "epoch": 0.69, | |
| "step": 46125 | |
| }, | |
| { | |
| "loss": 1.1683, | |
| "grad_norm": 2.2426815032958984, | |
| "learning_rate": 6.143101739208795e-05, | |
| "epoch": 0.69, | |
| "step": 46150 | |
| }, | |
| { | |
| "loss": 1.1749, | |
| "grad_norm": 1.519041657447815, | |
| "learning_rate": 6.135592202096663e-05, | |
| "epoch": 0.69, | |
| "step": 46175 | |
| }, | |
| { | |
| "loss": 1.1571, | |
| "grad_norm": 1.224007248878479, | |
| "learning_rate": 6.12808266498453e-05, | |
| "epoch": 0.69, | |
| "step": 46200 | |
| }, | |
| { | |
| "loss": 1.1916, | |
| "grad_norm": 1.8132357597351074, | |
| "learning_rate": 6.120573127872398e-05, | |
| "epoch": 0.69, | |
| "step": 46225 | |
| }, | |
| { | |
| "loss": 1.122, | |
| "grad_norm": 1.465853214263916, | |
| "learning_rate": 6.113063590760266e-05, | |
| "epoch": 0.69, | |
| "step": 46250 | |
| }, | |
| { | |
| "loss": 1.126, | |
| "grad_norm": 1.07510507106781, | |
| "learning_rate": 6.105554053648134e-05, | |
| "epoch": 0.69, | |
| "step": 46275 | |
| }, | |
| { | |
| "loss": 1.1603, | |
| "grad_norm": 1.3960785865783691, | |
| "learning_rate": 6.0980445165360014e-05, | |
| "epoch": 0.7, | |
| "step": 46300 | |
| }, | |
| { | |
| "loss": 1.1132, | |
| "grad_norm": 1.2278742790222168, | |
| "learning_rate": 6.0905349794238687e-05, | |
| "epoch": 0.7, | |
| "step": 46325 | |
| }, | |
| { | |
| "loss": 1.159, | |
| "grad_norm": 1.0870790481567383, | |
| "learning_rate": 6.083025442311736e-05, | |
| "epoch": 0.7, | |
| "step": 46350 | |
| }, | |
| { | |
| "loss": 1.1108, | |
| "grad_norm": 1.3281099796295166, | |
| "learning_rate": 6.075515905199604e-05, | |
| "epoch": 0.7, | |
| "step": 46375 | |
| }, | |
| { | |
| "loss": 1.1345, | |
| "grad_norm": 1.004966139793396, | |
| "learning_rate": 6.068006368087471e-05, | |
| "epoch": 0.7, | |
| "step": 46400 | |
| }, | |
| { | |
| "loss": 1.1449, | |
| "grad_norm": 1.102554202079773, | |
| "learning_rate": 6.060496830975338e-05, | |
| "epoch": 0.7, | |
| "step": 46425 | |
| }, | |
| { | |
| "loss": 1.1252, | |
| "grad_norm": 1.598632574081421, | |
| "learning_rate": 6.052987293863207e-05, | |
| "epoch": 0.7, | |
| "step": 46450 | |
| }, | |
| { | |
| "loss": 1.1236, | |
| "grad_norm": 1.7185138463974, | |
| "learning_rate": 6.0454777567510746e-05, | |
| "epoch": 0.7, | |
| "step": 46475 | |
| }, | |
| { | |
| "loss": 1.0913, | |
| "grad_norm": 1.3190034627914429, | |
| "learning_rate": 6.037968219638942e-05, | |
| "epoch": 0.7, | |
| "step": 46500 | |
| }, | |
| { | |
| "loss": 1.0544, | |
| "grad_norm": 2.019202947616577, | |
| "learning_rate": 6.03045868252681e-05, | |
| "epoch": 0.7, | |
| "step": 46525 | |
| }, | |
| { | |
| "loss": 1.12, | |
| "grad_norm": 1.3409463167190552, | |
| "learning_rate": 6.022949145414677e-05, | |
| "epoch": 0.7, | |
| "step": 46550 | |
| }, | |
| { | |
| "loss": 1.1206, | |
| "grad_norm": 1.9381085634231567, | |
| "learning_rate": 6.015439608302544e-05, | |
| "epoch": 0.7, | |
| "step": 46575 | |
| }, | |
| { | |
| "loss": 1.0804, | |
| "grad_norm": 1.1460068225860596, | |
| "learning_rate": 6.007930071190412e-05, | |
| "epoch": 0.7, | |
| "step": 46600 | |
| }, | |
| { | |
| "loss": 1.0772, | |
| "grad_norm": 1.3288995027542114, | |
| "learning_rate": 6.000420534078279e-05, | |
| "epoch": 0.7, | |
| "step": 46625 | |
| }, | |
| { | |
| "loss": 1.1105, | |
| "grad_norm": 1.3131263256072998, | |
| "learning_rate": 5.9929109969661464e-05, | |
| "epoch": 0.7, | |
| "step": 46650 | |
| }, | |
| { | |
| "loss": 1.1337, | |
| "grad_norm": 4.209181308746338, | |
| "learning_rate": 5.985401459854016e-05, | |
| "epoch": 0.7, | |
| "step": 46675 | |
| }, | |
| { | |
| "loss": 1.1791, | |
| "grad_norm": 1.737858772277832, | |
| "learning_rate": 5.977891922741883e-05, | |
| "epoch": 0.7, | |
| "step": 46700 | |
| }, | |
| { | |
| "loss": 1.1116, | |
| "grad_norm": 1.5516709089279175, | |
| "learning_rate": 5.97038238562975e-05, | |
| "epoch": 0.7, | |
| "step": 46725 | |
| }, | |
| { | |
| "loss": 1.1036, | |
| "grad_norm": 1.0340015888214111, | |
| "learning_rate": 5.962872848517618e-05, | |
| "epoch": 0.7, | |
| "step": 46750 | |
| }, | |
| { | |
| "loss": 1.1996, | |
| "grad_norm": 2.427554130554199, | |
| "learning_rate": 5.955363311405485e-05, | |
| "epoch": 0.7, | |
| "step": 46775 | |
| }, | |
| { | |
| "loss": 1.195, | |
| "grad_norm": 2.024531602859497, | |
| "learning_rate": 5.9478537742933524e-05, | |
| "epoch": 0.7, | |
| "step": 46800 | |
| }, | |
| { | |
| "loss": 1.2264, | |
| "grad_norm": 1.2301228046417236, | |
| "learning_rate": 5.94034423718122e-05, | |
| "epoch": 0.7, | |
| "step": 46825 | |
| }, | |
| { | |
| "loss": 1.0799, | |
| "grad_norm": 1.2450840473175049, | |
| "learning_rate": 5.9328347000690875e-05, | |
| "epoch": 0.7, | |
| "step": 46850 | |
| }, | |
| { | |
| "loss": 1.1088, | |
| "grad_norm": 1.3164767026901245, | |
| "learning_rate": 5.925325162956956e-05, | |
| "epoch": 0.7, | |
| "step": 46875 | |
| }, | |
| { | |
| "loss": 1.1358, | |
| "grad_norm": 1.6260111331939697, | |
| "learning_rate": 5.917815625844824e-05, | |
| "epoch": 0.7, | |
| "step": 46900 | |
| }, | |
| { | |
| "loss": 1.1555, | |
| "grad_norm": 1.2355117797851562, | |
| "learning_rate": 5.910306088732691e-05, | |
| "epoch": 0.7, | |
| "step": 46925 | |
| }, | |
| { | |
| "loss": 1.1982, | |
| "grad_norm": 1.639583706855774, | |
| "learning_rate": 5.9027965516205584e-05, | |
| "epoch": 0.71, | |
| "step": 46950 | |
| }, | |
| { | |
| "loss": 1.1229, | |
| "grad_norm": 1.5127111673355103, | |
| "learning_rate": 5.895287014508426e-05, | |
| "epoch": 0.71, | |
| "step": 46975 | |
| }, | |
| { | |
| "loss": 1.0991, | |
| "grad_norm": 1.5103808641433716, | |
| "learning_rate": 5.8877774773962935e-05, | |
| "epoch": 0.71, | |
| "step": 47000 | |
| }, | |
| { | |
| "loss": 1.1568, | |
| "grad_norm": 1.9068769216537476, | |
| "learning_rate": 5.880267940284161e-05, | |
| "epoch": 0.71, | |
| "step": 47025 | |
| }, | |
| { | |
| "loss": 1.1621, | |
| "grad_norm": 1.520690679550171, | |
| "learning_rate": 5.8727584031720286e-05, | |
| "epoch": 0.71, | |
| "step": 47050 | |
| }, | |
| { | |
| "loss": 1.148, | |
| "grad_norm": 3.158663749694824, | |
| "learning_rate": 5.865248866059896e-05, | |
| "epoch": 0.71, | |
| "step": 47075 | |
| }, | |
| { | |
| "loss": 1.1379, | |
| "grad_norm": 2.538459300994873, | |
| "learning_rate": 5.857739328947764e-05, | |
| "epoch": 0.71, | |
| "step": 47100 | |
| }, | |
| { | |
| "loss": 1.2194, | |
| "grad_norm": 1.1677653789520264, | |
| "learning_rate": 5.850229791835632e-05, | |
| "epoch": 0.71, | |
| "step": 47125 | |
| }, | |
| { | |
| "loss": 1.1959, | |
| "grad_norm": 1.7188001871109009, | |
| "learning_rate": 5.8427202547234994e-05, | |
| "epoch": 0.71, | |
| "step": 47150 | |
| }, | |
| { | |
| "loss": 1.1502, | |
| "grad_norm": 1.4081776142120361, | |
| "learning_rate": 5.8352107176113666e-05, | |
| "epoch": 0.71, | |
| "step": 47175 | |
| }, | |
| { | |
| "loss": 1.182, | |
| "grad_norm": 1.6037064790725708, | |
| "learning_rate": 5.8277011804992345e-05, | |
| "epoch": 0.71, | |
| "step": 47200 | |
| }, | |
| { | |
| "loss": 1.1552, | |
| "grad_norm": 2.5029053688049316, | |
| "learning_rate": 5.820191643387102e-05, | |
| "epoch": 0.71, | |
| "step": 47225 | |
| }, | |
| { | |
| "loss": 1.0832, | |
| "grad_norm": 1.6237151622772217, | |
| "learning_rate": 5.812682106274969e-05, | |
| "epoch": 0.71, | |
| "step": 47250 | |
| }, | |
| { | |
| "loss": 1.1307, | |
| "grad_norm": 1.8060946464538574, | |
| "learning_rate": 5.805172569162837e-05, | |
| "epoch": 0.71, | |
| "step": 47275 | |
| }, | |
| { | |
| "loss": 1.0566, | |
| "grad_norm": 1.7570223808288574, | |
| "learning_rate": 5.797663032050704e-05, | |
| "epoch": 0.71, | |
| "step": 47300 | |
| }, | |
| { | |
| "loss": 1.1227, | |
| "grad_norm": 0.9945117831230164, | |
| "learning_rate": 5.7901534949385726e-05, | |
| "epoch": 0.71, | |
| "step": 47325 | |
| }, | |
| { | |
| "loss": 1.1882, | |
| "grad_norm": 2.105391025543213, | |
| "learning_rate": 5.7826439578264405e-05, | |
| "epoch": 0.71, | |
| "step": 47350 | |
| }, | |
| { | |
| "loss": 1.1754, | |
| "grad_norm": 2.504192352294922, | |
| "learning_rate": 5.775134420714308e-05, | |
| "epoch": 0.71, | |
| "step": 47375 | |
| }, | |
| { | |
| "loss": 1.0904, | |
| "grad_norm": 1.456892967224121, | |
| "learning_rate": 5.767624883602175e-05, | |
| "epoch": 0.71, | |
| "step": 47400 | |
| }, | |
| { | |
| "loss": 1.1371, | |
| "grad_norm": 1.5804184675216675, | |
| "learning_rate": 5.760115346490043e-05, | |
| "epoch": 0.71, | |
| "step": 47425 | |
| }, | |
| { | |
| "loss": 1.132, | |
| "grad_norm": 2.141827344894409, | |
| "learning_rate": 5.75260580937791e-05, | |
| "epoch": 0.71, | |
| "step": 47450 | |
| }, | |
| { | |
| "loss": 1.142, | |
| "grad_norm": 2.439383029937744, | |
| "learning_rate": 5.745096272265777e-05, | |
| "epoch": 0.71, | |
| "step": 47475 | |
| }, | |
| { | |
| "loss": 1.1004, | |
| "grad_norm": 1.3394807577133179, | |
| "learning_rate": 5.737586735153645e-05, | |
| "epoch": 0.71, | |
| "step": 47500 | |
| }, | |
| { | |
| "loss": 1.0454, | |
| "grad_norm": 1.1147385835647583, | |
| "learning_rate": 5.730077198041514e-05, | |
| "epoch": 0.71, | |
| "step": 47525 | |
| }, | |
| { | |
| "loss": 1.1216, | |
| "grad_norm": 2.184941291809082, | |
| "learning_rate": 5.722567660929381e-05, | |
| "epoch": 0.71, | |
| "step": 47550 | |
| }, | |
| { | |
| "loss": 1.1123, | |
| "grad_norm": 1.4739607572555542, | |
| "learning_rate": 5.715058123817249e-05, | |
| "epoch": 0.71, | |
| "step": 47575 | |
| }, | |
| { | |
| "loss": 1.0541, | |
| "grad_norm": 1.1950966119766235, | |
| "learning_rate": 5.707548586705116e-05, | |
| "epoch": 0.71, | |
| "step": 47600 | |
| }, | |
| { | |
| "loss": 1.0943, | |
| "grad_norm": 1.8516206741333008, | |
| "learning_rate": 5.700039049592983e-05, | |
| "epoch": 0.72, | |
| "step": 47625 | |
| }, | |
| { | |
| "loss": 1.1332, | |
| "grad_norm": 1.2161145210266113, | |
| "learning_rate": 5.692529512480851e-05, | |
| "epoch": 0.72, | |
| "step": 47650 | |
| }, | |
| { | |
| "loss": 1.182, | |
| "grad_norm": 1.776667833328247, | |
| "learning_rate": 5.685019975368718e-05, | |
| "epoch": 0.72, | |
| "step": 47675 | |
| }, | |
| { | |
| "loss": 1.1426, | |
| "grad_norm": 1.0378066301345825, | |
| "learning_rate": 5.6775104382565855e-05, | |
| "epoch": 0.72, | |
| "step": 47700 | |
| }, | |
| { | |
| "loss": 1.1334, | |
| "grad_norm": 1.2625921964645386, | |
| "learning_rate": 5.6700009011444534e-05, | |
| "epoch": 0.72, | |
| "step": 47725 | |
| }, | |
| { | |
| "loss": 1.0911, | |
| "grad_norm": 1.5473499298095703, | |
| "learning_rate": 5.662491364032322e-05, | |
| "epoch": 0.72, | |
| "step": 47750 | |
| }, | |
| { | |
| "loss": 1.1503, | |
| "grad_norm": 2.243377447128296, | |
| "learning_rate": 5.654981826920189e-05, | |
| "epoch": 0.72, | |
| "step": 47775 | |
| }, | |
| { | |
| "loss": 1.1469, | |
| "grad_norm": 1.3182121515274048, | |
| "learning_rate": 5.647472289808057e-05, | |
| "epoch": 0.72, | |
| "step": 47800 | |
| }, | |
| { | |
| "loss": 1.1264, | |
| "grad_norm": 1.2882803678512573, | |
| "learning_rate": 5.639962752695924e-05, | |
| "epoch": 0.72, | |
| "step": 47825 | |
| }, | |
| { | |
| "loss": 1.1511, | |
| "grad_norm": 1.2225452661514282, | |
| "learning_rate": 5.6324532155837915e-05, | |
| "epoch": 0.72, | |
| "step": 47850 | |
| }, | |
| { | |
| "loss": 1.1468, | |
| "grad_norm": 2.1035497188568115, | |
| "learning_rate": 5.6249436784716593e-05, | |
| "epoch": 0.72, | |
| "step": 47875 | |
| }, | |
| { | |
| "loss": 1.1549, | |
| "grad_norm": 1.6698153018951416, | |
| "learning_rate": 5.6174341413595265e-05, | |
| "epoch": 0.72, | |
| "step": 47900 | |
| }, | |
| { | |
| "loss": 1.176, | |
| "grad_norm": 1.939454436302185, | |
| "learning_rate": 5.609924604247394e-05, | |
| "epoch": 0.72, | |
| "step": 47925 | |
| }, | |
| { | |
| "loss": 1.1494, | |
| "grad_norm": 1.100350260734558, | |
| "learning_rate": 5.6024150671352616e-05, | |
| "epoch": 0.72, | |
| "step": 47950 | |
| }, | |
| { | |
| "loss": 1.1401, | |
| "grad_norm": 1.3846749067306519, | |
| "learning_rate": 5.59490553002313e-05, | |
| "epoch": 0.72, | |
| "step": 47975 | |
| }, | |
| { | |
| "loss": 1.1711, | |
| "grad_norm": 1.5334635972976685, | |
| "learning_rate": 5.5873959929109974e-05, | |
| "epoch": 0.72, | |
| "step": 48000 | |
| }, | |
| { | |
| "loss": 1.1391, | |
| "grad_norm": 1.1351529359817505, | |
| "learning_rate": 5.579886455798865e-05, | |
| "epoch": 0.72, | |
| "step": 48025 | |
| }, | |
| { | |
| "loss": 1.1284, | |
| "grad_norm": 1.8126013278961182, | |
| "learning_rate": 5.5723769186867325e-05, | |
| "epoch": 0.72, | |
| "step": 48050 | |
| }, | |
| { | |
| "loss": 1.1845, | |
| "grad_norm": 1.187787652015686, | |
| "learning_rate": 5.5648673815746e-05, | |
| "epoch": 0.72, | |
| "step": 48075 | |
| }, | |
| { | |
| "loss": 1.1659, | |
| "grad_norm": 2.978299140930176, | |
| "learning_rate": 5.5573578444624676e-05, | |
| "epoch": 0.72, | |
| "step": 48100 | |
| }, | |
| { | |
| "loss": 1.1497, | |
| "grad_norm": 1.6019436120986938, | |
| "learning_rate": 5.549848307350335e-05, | |
| "epoch": 0.72, | |
| "step": 48125 | |
| }, | |
| { | |
| "loss": 1.0611, | |
| "grad_norm": 1.380462408065796, | |
| "learning_rate": 5.542338770238202e-05, | |
| "epoch": 0.72, | |
| "step": 48150 | |
| }, | |
| { | |
| "loss": 1.1723, | |
| "grad_norm": 1.732176661491394, | |
| "learning_rate": 5.53482923312607e-05, | |
| "epoch": 0.72, | |
| "step": 48175 | |
| }, | |
| { | |
| "loss": 1.2292, | |
| "grad_norm": 1.4990782737731934, | |
| "learning_rate": 5.5273196960139385e-05, | |
| "epoch": 0.72, | |
| "step": 48200 | |
| }, | |
| { | |
| "loss": 1.1319, | |
| "grad_norm": 1.7996023893356323, | |
| "learning_rate": 5.519810158901806e-05, | |
| "epoch": 0.72, | |
| "step": 48225 | |
| }, | |
| { | |
| "loss": 1.1087, | |
| "grad_norm": 1.8955588340759277, | |
| "learning_rate": 5.5123006217896736e-05, | |
| "epoch": 0.72, | |
| "step": 48250 | |
| }, | |
| { | |
| "loss": 1.1206, | |
| "grad_norm": 1.337724208831787, | |
| "learning_rate": 5.504791084677541e-05, | |
| "epoch": 0.73, | |
| "step": 48275 | |
| }, | |
| { | |
| "loss": 1.1627, | |
| "grad_norm": 1.515310525894165, | |
| "learning_rate": 5.497281547565408e-05, | |
| "epoch": 0.73, | |
| "step": 48300 | |
| }, | |
| { | |
| "loss": 1.1334, | |
| "grad_norm": 1.9032014608383179, | |
| "learning_rate": 5.489772010453276e-05, | |
| "epoch": 0.73, | |
| "step": 48325 | |
| }, | |
| { | |
| "loss": 1.0996, | |
| "grad_norm": 1.8782274723052979, | |
| "learning_rate": 5.482262473341143e-05, | |
| "epoch": 0.73, | |
| "step": 48350 | |
| }, | |
| { | |
| "loss": 1.1287, | |
| "grad_norm": 1.1794263124465942, | |
| "learning_rate": 5.474752936229011e-05, | |
| "epoch": 0.73, | |
| "step": 48375 | |
| }, | |
| { | |
| "loss": 1.1001, | |
| "grad_norm": 2.261495351791382, | |
| "learning_rate": 5.4672433991168795e-05, | |
| "epoch": 0.73, | |
| "step": 48400 | |
| }, | |
| { | |
| "loss": 1.0928, | |
| "grad_norm": 1.380417823791504, | |
| "learning_rate": 5.459733862004747e-05, | |
| "epoch": 0.73, | |
| "step": 48425 | |
| }, | |
| { | |
| "loss": 1.0945, | |
| "grad_norm": 1.0887725353240967, | |
| "learning_rate": 5.452224324892614e-05, | |
| "epoch": 0.73, | |
| "step": 48450 | |
| }, | |
| { | |
| "loss": 1.1247, | |
| "grad_norm": 1.2714431285858154, | |
| "learning_rate": 5.444714787780482e-05, | |
| "epoch": 0.73, | |
| "step": 48475 | |
| }, | |
| { | |
| "loss": 1.0856, | |
| "grad_norm": 1.475818157196045, | |
| "learning_rate": 5.437205250668349e-05, | |
| "epoch": 0.73, | |
| "step": 48500 | |
| }, | |
| { | |
| "loss": 1.0579, | |
| "grad_norm": 1.0250446796417236, | |
| "learning_rate": 5.429695713556216e-05, | |
| "epoch": 0.73, | |
| "step": 48525 | |
| }, | |
| { | |
| "loss": 1.0451, | |
| "grad_norm": 1.4754000902175903, | |
| "learning_rate": 5.422186176444084e-05, | |
| "epoch": 0.73, | |
| "step": 48550 | |
| }, | |
| { | |
| "loss": 1.1335, | |
| "grad_norm": 1.6405061483383179, | |
| "learning_rate": 5.4146766393319514e-05, | |
| "epoch": 0.73, | |
| "step": 48575 | |
| }, | |
| { | |
| "loss": 1.1399, | |
| "grad_norm": 1.1049416065216064, | |
| "learning_rate": 5.407167102219819e-05, | |
| "epoch": 0.73, | |
| "step": 48600 | |
| }, | |
| { | |
| "loss": 1.1112, | |
| "grad_norm": 1.7172602415084839, | |
| "learning_rate": 5.399657565107688e-05, | |
| "epoch": 0.73, | |
| "step": 48625 | |
| }, | |
| { | |
| "loss": 1.1008, | |
| "grad_norm": 1.833646297454834, | |
| "learning_rate": 5.392148027995555e-05, | |
| "epoch": 0.73, | |
| "step": 48650 | |
| }, | |
| { | |
| "loss": 1.1053, | |
| "grad_norm": 1.4659416675567627, | |
| "learning_rate": 5.384638490883422e-05, | |
| "epoch": 0.73, | |
| "step": 48675 | |
| }, | |
| { | |
| "loss": 1.0825, | |
| "grad_norm": 1.6295710802078247, | |
| "learning_rate": 5.37712895377129e-05, | |
| "epoch": 0.73, | |
| "step": 48700 | |
| }, | |
| { | |
| "loss": 1.1665, | |
| "grad_norm": 1.4064006805419922, | |
| "learning_rate": 5.369619416659157e-05, | |
| "epoch": 0.73, | |
| "step": 48725 | |
| }, | |
| { | |
| "loss": 1.0461, | |
| "grad_norm": 1.548251986503601, | |
| "learning_rate": 5.3621098795470245e-05, | |
| "epoch": 0.73, | |
| "step": 48750 | |
| }, | |
| { | |
| "loss": 1.1471, | |
| "grad_norm": 1.6318676471710205, | |
| "learning_rate": 5.3546003424348924e-05, | |
| "epoch": 0.73, | |
| "step": 48775 | |
| }, | |
| { | |
| "loss": 1.1411, | |
| "grad_norm": 1.321753978729248, | |
| "learning_rate": 5.3470908053227596e-05, | |
| "epoch": 0.73, | |
| "step": 48800 | |
| }, | |
| { | |
| "loss": 1.1102, | |
| "grad_norm": 1.167902946472168, | |
| "learning_rate": 5.3395812682106275e-05, | |
| "epoch": 0.73, | |
| "step": 48825 | |
| }, | |
| { | |
| "loss": 1.0909, | |
| "grad_norm": 1.3905353546142578, | |
| "learning_rate": 5.332071731098496e-05, | |
| "epoch": 0.73, | |
| "step": 48850 | |
| }, | |
| { | |
| "loss": 1.1796, | |
| "grad_norm": 1.0988661050796509, | |
| "learning_rate": 5.324562193986363e-05, | |
| "epoch": 0.73, | |
| "step": 48875 | |
| }, | |
| { | |
| "loss": 1.2214, | |
| "grad_norm": 1.348551630973816, | |
| "learning_rate": 5.3170526568742305e-05, | |
| "epoch": 0.73, | |
| "step": 48900 | |
| }, | |
| { | |
| "loss": 1.0649, | |
| "grad_norm": 1.176352858543396, | |
| "learning_rate": 5.3095431197620984e-05, | |
| "epoch": 0.73, | |
| "step": 48925 | |
| }, | |
| { | |
| "loss": 1.1334, | |
| "grad_norm": 1.448281168937683, | |
| "learning_rate": 5.3020335826499656e-05, | |
| "epoch": 0.74, | |
| "step": 48950 | |
| }, | |
| { | |
| "loss": 1.1764, | |
| "grad_norm": 1.7525986433029175, | |
| "learning_rate": 5.294524045537833e-05, | |
| "epoch": 0.74, | |
| "step": 48975 | |
| }, | |
| { | |
| "loss": 1.1873, | |
| "grad_norm": 1.6281651258468628, | |
| "learning_rate": 5.287014508425701e-05, | |
| "epoch": 0.74, | |
| "step": 49000 | |
| }, | |
| { | |
| "loss": 1.1259, | |
| "grad_norm": 1.1979387998580933, | |
| "learning_rate": 5.279504971313568e-05, | |
| "epoch": 0.74, | |
| "step": 49025 | |
| }, | |
| { | |
| "loss": 1.084, | |
| "grad_norm": 1.2359240055084229, | |
| "learning_rate": 5.2719954342014365e-05, | |
| "epoch": 0.74, | |
| "step": 49050 | |
| }, | |
| { | |
| "loss": 1.0872, | |
| "grad_norm": 1.6398156881332397, | |
| "learning_rate": 5.2644858970893044e-05, | |
| "epoch": 0.74, | |
| "step": 49075 | |
| }, | |
| { | |
| "loss": 1.1564, | |
| "grad_norm": 1.6317737102508545, | |
| "learning_rate": 5.2569763599771716e-05, | |
| "epoch": 0.74, | |
| "step": 49100 | |
| }, | |
| { | |
| "loss": 1.1798, | |
| "grad_norm": 1.1270239353179932, | |
| "learning_rate": 5.249466822865039e-05, | |
| "epoch": 0.74, | |
| "step": 49125 | |
| }, | |
| { | |
| "loss": 1.1076, | |
| "grad_norm": 1.504706621170044, | |
| "learning_rate": 5.2419572857529067e-05, | |
| "epoch": 0.74, | |
| "step": 49150 | |
| }, | |
| { | |
| "loss": 1.0761, | |
| "grad_norm": 2.199673891067505, | |
| "learning_rate": 5.234447748640774e-05, | |
| "epoch": 0.74, | |
| "step": 49175 | |
| }, | |
| { | |
| "loss": 1.1395, | |
| "grad_norm": 1.288529396057129, | |
| "learning_rate": 5.226938211528641e-05, | |
| "epoch": 0.74, | |
| "step": 49200 | |
| }, | |
| { | |
| "loss": 1.1776, | |
| "grad_norm": 1.4537650346755981, | |
| "learning_rate": 5.219428674416509e-05, | |
| "epoch": 0.74, | |
| "step": 49225 | |
| }, | |
| { | |
| "loss": 1.1639, | |
| "grad_norm": 2.028994560241699, | |
| "learning_rate": 5.211919137304376e-05, | |
| "epoch": 0.74, | |
| "step": 49250 | |
| }, | |
| { | |
| "loss": 1.0929, | |
| "grad_norm": 1.8188201189041138, | |
| "learning_rate": 5.204409600192245e-05, | |
| "epoch": 0.74, | |
| "step": 49275 | |
| }, | |
| { | |
| "loss": 1.1054, | |
| "grad_norm": 1.5506641864776611, | |
| "learning_rate": 5.1969000630801126e-05, | |
| "epoch": 0.74, | |
| "step": 49300 | |
| }, | |
| { | |
| "loss": 1.0699, | |
| "grad_norm": 1.526999592781067, | |
| "learning_rate": 5.18939052596798e-05, | |
| "epoch": 0.74, | |
| "step": 49325 | |
| }, | |
| { | |
| "loss": 1.1666, | |
| "grad_norm": 1.4270501136779785, | |
| "learning_rate": 5.181880988855847e-05, | |
| "epoch": 0.74, | |
| "step": 49350 | |
| }, | |
| { | |
| "loss": 1.11, | |
| "grad_norm": 1.3699140548706055, | |
| "learning_rate": 5.174371451743715e-05, | |
| "epoch": 0.74, | |
| "step": 49375 | |
| }, | |
| { | |
| "loss": 1.1069, | |
| "grad_norm": 1.468616008758545, | |
| "learning_rate": 5.166861914631582e-05, | |
| "epoch": 0.74, | |
| "step": 49400 | |
| }, | |
| { | |
| "loss": 1.0625, | |
| "grad_norm": 2.158735513687134, | |
| "learning_rate": 5.1593523775194493e-05, | |
| "epoch": 0.74, | |
| "step": 49425 | |
| }, | |
| { | |
| "loss": 1.1034, | |
| "grad_norm": 1.0673458576202393, | |
| "learning_rate": 5.151842840407317e-05, | |
| "epoch": 0.74, | |
| "step": 49450 | |
| }, | |
| { | |
| "loss": 1.1262, | |
| "grad_norm": 1.5599156618118286, | |
| "learning_rate": 5.1443333032951844e-05, | |
| "epoch": 0.74, | |
| "step": 49475 | |
| }, | |
| { | |
| "loss": 1.1222, | |
| "grad_norm": 1.7378026247024536, | |
| "learning_rate": 5.136823766183053e-05, | |
| "epoch": 0.74, | |
| "step": 49500 | |
| }, | |
| { | |
| "loss": 1.2029, | |
| "grad_norm": 2.2808854579925537, | |
| "learning_rate": 5.129314229070921e-05, | |
| "epoch": 0.74, | |
| "step": 49525 | |
| }, | |
| { | |
| "loss": 1.1349, | |
| "grad_norm": 1.3279706239700317, | |
| "learning_rate": 5.121804691958788e-05, | |
| "epoch": 0.74, | |
| "step": 49550 | |
| }, | |
| { | |
| "loss": 1.1291, | |
| "grad_norm": 1.7178452014923096, | |
| "learning_rate": 5.114295154846655e-05, | |
| "epoch": 0.74, | |
| "step": 49575 | |
| }, | |
| { | |
| "loss": 1.1029, | |
| "grad_norm": 1.7041009664535522, | |
| "learning_rate": 5.106785617734523e-05, | |
| "epoch": 0.74, | |
| "step": 49600 | |
| }, | |
| { | |
| "loss": 1.1401, | |
| "grad_norm": 1.7986174821853638, | |
| "learning_rate": 5.0992760806223904e-05, | |
| "epoch": 0.75, | |
| "step": 49625 | |
| }, | |
| { | |
| "loss": 1.095, | |
| "grad_norm": 1.9708353281021118, | |
| "learning_rate": 5.0917665435102576e-05, | |
| "epoch": 0.75, | |
| "step": 49650 | |
| }, | |
| { | |
| "loss": 1.0777, | |
| "grad_norm": 1.675958275794983, | |
| "learning_rate": 5.0842570063981255e-05, | |
| "epoch": 0.75, | |
| "step": 49675 | |
| }, | |
| { | |
| "loss": 1.0473, | |
| "grad_norm": 1.1292997598648071, | |
| "learning_rate": 5.076747469285994e-05, | |
| "epoch": 0.75, | |
| "step": 49700 | |
| }, | |
| { | |
| "loss": 1.1196, | |
| "grad_norm": 1.3241393566131592, | |
| "learning_rate": 5.069237932173861e-05, | |
| "epoch": 0.75, | |
| "step": 49725 | |
| }, | |
| { | |
| "loss": 1.1062, | |
| "grad_norm": 1.148818850517273, | |
| "learning_rate": 5.061728395061729e-05, | |
| "epoch": 0.75, | |
| "step": 49750 | |
| }, | |
| { | |
| "loss": 1.1366, | |
| "grad_norm": 1.6072300672531128, | |
| "learning_rate": 5.0542188579495964e-05, | |
| "epoch": 0.75, | |
| "step": 49775 | |
| }, | |
| { | |
| "loss": 1.062, | |
| "grad_norm": 2.6563072204589844, | |
| "learning_rate": 5.0467093208374636e-05, | |
| "epoch": 0.75, | |
| "step": 49800 | |
| }, | |
| { | |
| "loss": 1.1615, | |
| "grad_norm": 1.5061039924621582, | |
| "learning_rate": 5.0391997837253315e-05, | |
| "epoch": 0.75, | |
| "step": 49825 | |
| }, | |
| { | |
| "loss": 1.1299, | |
| "grad_norm": 1.6999189853668213, | |
| "learning_rate": 5.031690246613199e-05, | |
| "epoch": 0.75, | |
| "step": 49850 | |
| }, | |
| { | |
| "loss": 1.157, | |
| "grad_norm": 1.8330657482147217, | |
| "learning_rate": 5.0241807095010666e-05, | |
| "epoch": 0.75, | |
| "step": 49875 | |
| }, | |
| { | |
| "loss": 1.1452, | |
| "grad_norm": 1.6632230281829834, | |
| "learning_rate": 5.016671172388934e-05, | |
| "epoch": 0.75, | |
| "step": 49900 | |
| }, | |
| { | |
| "loss": 1.1767, | |
| "grad_norm": 1.2450637817382812, | |
| "learning_rate": 5.0091616352768023e-05, | |
| "epoch": 0.75, | |
| "step": 49925 | |
| }, | |
| { | |
| "loss": 1.1398, | |
| "grad_norm": 1.1371906995773315, | |
| "learning_rate": 5.0016520981646695e-05, | |
| "epoch": 0.75, | |
| "step": 49950 | |
| }, | |
| { | |
| "loss": 1.1173, | |
| "grad_norm": 1.1480075120925903, | |
| "learning_rate": 4.9941425610525374e-05, | |
| "epoch": 0.75, | |
| "step": 49975 | |
| }, | |
| { | |
| "loss": 1.0499, | |
| "grad_norm": 1.0953096151351929, | |
| "learning_rate": 4.9866330239404046e-05, | |
| "epoch": 0.75, | |
| "step": 50000 | |
| }, | |
| { | |
| "loss": 1.0824, | |
| "grad_norm": 1.4675097465515137, | |
| "learning_rate": 4.979123486828272e-05, | |
| "epoch": 0.75, | |
| "step": 50025 | |
| }, | |
| { | |
| "loss": 1.1413, | |
| "grad_norm": 1.9132678508758545, | |
| "learning_rate": 4.97161394971614e-05, | |
| "epoch": 0.75, | |
| "step": 50050 | |
| }, | |
| { | |
| "loss": 1.0562, | |
| "grad_norm": 1.3277969360351562, | |
| "learning_rate": 4.9641044126040076e-05, | |
| "epoch": 0.75, | |
| "step": 50075 | |
| }, | |
| { | |
| "loss": 1.1071, | |
| "grad_norm": 1.4362881183624268, | |
| "learning_rate": 4.956594875491875e-05, | |
| "epoch": 0.75, | |
| "step": 50100 | |
| }, | |
| { | |
| "loss": 1.1572, | |
| "grad_norm": 0.8633365631103516, | |
| "learning_rate": 4.949085338379743e-05, | |
| "epoch": 0.75, | |
| "step": 50125 | |
| }, | |
| { | |
| "loss": 1.0881, | |
| "grad_norm": 1.653272271156311, | |
| "learning_rate": 4.94157580126761e-05, | |
| "epoch": 0.75, | |
| "step": 50150 | |
| }, | |
| { | |
| "loss": 1.158, | |
| "grad_norm": 2.0135273933410645, | |
| "learning_rate": 4.934066264155477e-05, | |
| "epoch": 0.75, | |
| "step": 50175 | |
| }, | |
| { | |
| "loss": 1.1024, | |
| "grad_norm": 1.119586706161499, | |
| "learning_rate": 4.926556727043346e-05, | |
| "epoch": 0.75, | |
| "step": 50200 | |
| }, | |
| { | |
| "loss": 1.1258, | |
| "grad_norm": 0.9510914087295532, | |
| "learning_rate": 4.919047189931213e-05, | |
| "epoch": 0.75, | |
| "step": 50225 | |
| }, | |
| { | |
| "loss": 1.2239, | |
| "grad_norm": 1.510134220123291, | |
| "learning_rate": 4.91153765281908e-05, | |
| "epoch": 0.75, | |
| "step": 50250 | |
| }, | |
| { | |
| "loss": 1.0673, | |
| "grad_norm": 1.0487359762191772, | |
| "learning_rate": 4.904028115706948e-05, | |
| "epoch": 0.76, | |
| "step": 50275 | |
| }, | |
| { | |
| "loss": 1.1268, | |
| "grad_norm": 1.5456733703613281, | |
| "learning_rate": 4.896518578594816e-05, | |
| "epoch": 0.76, | |
| "step": 50300 | |
| }, | |
| { | |
| "loss": 1.2115, | |
| "grad_norm": 1.761313557624817, | |
| "learning_rate": 4.889009041482683e-05, | |
| "epoch": 0.76, | |
| "step": 50325 | |
| }, | |
| { | |
| "loss": 1.1656, | |
| "grad_norm": 1.7086901664733887, | |
| "learning_rate": 4.881499504370551e-05, | |
| "epoch": 0.76, | |
| "step": 50350 | |
| }, | |
| { | |
| "loss": 1.0727, | |
| "grad_norm": 1.7276190519332886, | |
| "learning_rate": 4.874290348742904e-05, | |
| "epoch": 0.76, | |
| "step": 50375 | |
| }, | |
| { | |
| "loss": 1.0991, | |
| "grad_norm": 1.3871339559555054, | |
| "learning_rate": 4.866780811630771e-05, | |
| "epoch": 0.76, | |
| "step": 50400 | |
| }, | |
| { | |
| "loss": 1.0951, | |
| "grad_norm": 1.9214799404144287, | |
| "learning_rate": 4.859271274518639e-05, | |
| "epoch": 0.76, | |
| "step": 50425 | |
| }, | |
| { | |
| "loss": 1.1219, | |
| "grad_norm": 1.5749818086624146, | |
| "learning_rate": 4.851761737406507e-05, | |
| "epoch": 0.76, | |
| "step": 50450 | |
| }, | |
| { | |
| "loss": 1.106, | |
| "grad_norm": 4.110257625579834, | |
| "learning_rate": 4.844252200294374e-05, | |
| "epoch": 0.76, | |
| "step": 50475 | |
| }, | |
| { | |
| "loss": 1.0866, | |
| "grad_norm": 1.6532156467437744, | |
| "learning_rate": 4.836742663182241e-05, | |
| "epoch": 0.76, | |
| "step": 50500 | |
| }, | |
| { | |
| "loss": 1.1183, | |
| "grad_norm": 1.485727071762085, | |
| "learning_rate": 4.829233126070109e-05, | |
| "epoch": 0.76, | |
| "step": 50525 | |
| }, | |
| { | |
| "loss": 1.1405, | |
| "grad_norm": 1.4788265228271484, | |
| "learning_rate": 4.821723588957977e-05, | |
| "epoch": 0.76, | |
| "step": 50550 | |
| }, | |
| { | |
| "loss": 1.1491, | |
| "grad_norm": 2.762058973312378, | |
| "learning_rate": 4.814214051845844e-05, | |
| "epoch": 0.76, | |
| "step": 50575 | |
| }, | |
| { | |
| "loss": 1.1483, | |
| "grad_norm": 1.7897312641143799, | |
| "learning_rate": 4.806704514733712e-05, | |
| "epoch": 0.76, | |
| "step": 50600 | |
| }, | |
| { | |
| "loss": 1.0947, | |
| "grad_norm": 1.1446094512939453, | |
| "learning_rate": 4.799194977621579e-05, | |
| "epoch": 0.76, | |
| "step": 50625 | |
| }, | |
| { | |
| "loss": 1.1116, | |
| "grad_norm": 0.997673749923706, | |
| "learning_rate": 4.791685440509447e-05, | |
| "epoch": 0.76, | |
| "step": 50650 | |
| }, | |
| { | |
| "loss": 1.1374, | |
| "grad_norm": 1.243049144744873, | |
| "learning_rate": 4.784175903397315e-05, | |
| "epoch": 0.76, | |
| "step": 50675 | |
| }, | |
| { | |
| "loss": 1.1458, | |
| "grad_norm": 1.5746535062789917, | |
| "learning_rate": 4.776666366285182e-05, | |
| "epoch": 0.76, | |
| "step": 50700 | |
| }, | |
| { | |
| "loss": 1.1191, | |
| "grad_norm": 1.0133841037750244, | |
| "learning_rate": 4.7691568291730495e-05, | |
| "epoch": 0.76, | |
| "step": 50725 | |
| }, | |
| { | |
| "loss": 1.0886, | |
| "grad_norm": 2.07098650932312, | |
| "learning_rate": 4.761647292060918e-05, | |
| "epoch": 0.76, | |
| "step": 50750 | |
| }, | |
| { | |
| "loss": 1.1666, | |
| "grad_norm": 2.41629695892334, | |
| "learning_rate": 4.754137754948785e-05, | |
| "epoch": 0.76, | |
| "step": 50775 | |
| }, | |
| { | |
| "loss": 1.1094, | |
| "grad_norm": 1.2683985233306885, | |
| "learning_rate": 4.7466282178366525e-05, | |
| "epoch": 0.76, | |
| "step": 50800 | |
| }, | |
| { | |
| "loss": 1.1979, | |
| "grad_norm": 2.520268201828003, | |
| "learning_rate": 4.7391186807245204e-05, | |
| "epoch": 0.76, | |
| "step": 50825 | |
| }, | |
| { | |
| "loss": 1.124, | |
| "grad_norm": 1.5505998134613037, | |
| "learning_rate": 4.7316091436123876e-05, | |
| "epoch": 0.76, | |
| "step": 50850 | |
| }, | |
| { | |
| "loss": 1.1447, | |
| "grad_norm": 1.3630999326705933, | |
| "learning_rate": 4.7240996065002555e-05, | |
| "epoch": 0.76, | |
| "step": 50875 | |
| }, | |
| { | |
| "loss": 1.0995, | |
| "grad_norm": 1.36336350440979, | |
| "learning_rate": 4.7165900693881234e-05, | |
| "epoch": 0.76, | |
| "step": 50900 | |
| }, | |
| { | |
| "loss": 1.0873, | |
| "grad_norm": 1.852036952972412, | |
| "learning_rate": 4.7090805322759906e-05, | |
| "epoch": 0.76, | |
| "step": 50925 | |
| }, | |
| { | |
| "loss": 1.1525, | |
| "grad_norm": 1.3926178216934204, | |
| "learning_rate": 4.7015709951638585e-05, | |
| "epoch": 0.77, | |
| "step": 50950 | |
| }, | |
| { | |
| "loss": 1.0773, | |
| "grad_norm": 1.5449055433273315, | |
| "learning_rate": 4.6940614580517263e-05, | |
| "epoch": 0.77, | |
| "step": 50975 | |
| }, | |
| { | |
| "loss": 1.1175, | |
| "grad_norm": 1.1615759134292603, | |
| "learning_rate": 4.6865519209395936e-05, | |
| "epoch": 0.77, | |
| "step": 51000 | |
| }, | |
| { | |
| "loss": 1.1354, | |
| "grad_norm": 1.8657679557800293, | |
| "learning_rate": 4.679042383827461e-05, | |
| "epoch": 0.77, | |
| "step": 51025 | |
| }, | |
| { | |
| "loss": 1.0671, | |
| "grad_norm": 1.0491373538970947, | |
| "learning_rate": 4.6715328467153287e-05, | |
| "epoch": 0.77, | |
| "step": 51050 | |
| }, | |
| { | |
| "loss": 1.0703, | |
| "grad_norm": 1.1459797620773315, | |
| "learning_rate": 4.6640233096031965e-05, | |
| "epoch": 0.77, | |
| "step": 51075 | |
| }, | |
| { | |
| "loss": 1.0243, | |
| "grad_norm": 2.31217098236084, | |
| "learning_rate": 4.656513772491064e-05, | |
| "epoch": 0.77, | |
| "step": 51100 | |
| }, | |
| { | |
| "loss": 1.1454, | |
| "grad_norm": 1.3556299209594727, | |
| "learning_rate": 4.6490042353789316e-05, | |
| "epoch": 0.77, | |
| "step": 51125 | |
| }, | |
| { | |
| "loss": 1.0407, | |
| "grad_norm": 1.560073971748352, | |
| "learning_rate": 4.641494698266799e-05, | |
| "epoch": 0.77, | |
| "step": 51150 | |
| }, | |
| { | |
| "loss": 1.1132, | |
| "grad_norm": 2.1798226833343506, | |
| "learning_rate": 4.633985161154667e-05, | |
| "epoch": 0.77, | |
| "step": 51175 | |
| }, | |
| { | |
| "loss": 1.1708, | |
| "grad_norm": 1.246620774269104, | |
| "learning_rate": 4.6264756240425346e-05, | |
| "epoch": 0.77, | |
| "step": 51200 | |
| }, | |
| { | |
| "loss": 1.1629, | |
| "grad_norm": 1.2318778038024902, | |
| "learning_rate": 4.618966086930402e-05, | |
| "epoch": 0.77, | |
| "step": 51225 | |
| }, | |
| { | |
| "loss": 1.1289, | |
| "grad_norm": 1.7169677019119263, | |
| "learning_rate": 4.611456549818269e-05, | |
| "epoch": 0.77, | |
| "step": 51250 | |
| }, | |
| { | |
| "loss": 1.1142, | |
| "grad_norm": 1.4329315423965454, | |
| "learning_rate": 4.603947012706137e-05, | |
| "epoch": 0.77, | |
| "step": 51275 | |
| }, | |
| { | |
| "loss": 1.0928, | |
| "grad_norm": 1.3384554386138916, | |
| "learning_rate": 4.596437475594005e-05, | |
| "epoch": 0.77, | |
| "step": 51300 | |
| }, | |
| { | |
| "loss": 1.0633, | |
| "grad_norm": 1.2550382614135742, | |
| "learning_rate": 4.588927938481872e-05, | |
| "epoch": 0.77, | |
| "step": 51325 | |
| }, | |
| { | |
| "loss": 1.1229, | |
| "grad_norm": 1.7870949506759644, | |
| "learning_rate": 4.58141840136974e-05, | |
| "epoch": 0.77, | |
| "step": 51350 | |
| }, | |
| { | |
| "loss": 1.0861, | |
| "grad_norm": 1.3869023323059082, | |
| "learning_rate": 4.573908864257607e-05, | |
| "epoch": 0.77, | |
| "step": 51375 | |
| }, | |
| { | |
| "loss": 1.129, | |
| "grad_norm": 1.477853536605835, | |
| "learning_rate": 4.566399327145475e-05, | |
| "epoch": 0.77, | |
| "step": 51400 | |
| }, | |
| { | |
| "loss": 1.1998, | |
| "grad_norm": 1.8617823123931885, | |
| "learning_rate": 4.558889790033343e-05, | |
| "epoch": 0.77, | |
| "step": 51425 | |
| }, | |
| { | |
| "loss": 1.1041, | |
| "grad_norm": 1.5415617227554321, | |
| "learning_rate": 4.55138025292121e-05, | |
| "epoch": 0.77, | |
| "step": 51450 | |
| }, | |
| { | |
| "loss": 1.0528, | |
| "grad_norm": 1.7439303398132324, | |
| "learning_rate": 4.543870715809077e-05, | |
| "epoch": 0.77, | |
| "step": 51475 | |
| }, | |
| { | |
| "loss": 1.1482, | |
| "grad_norm": 1.3494808673858643, | |
| "learning_rate": 4.536361178696945e-05, | |
| "epoch": 0.77, | |
| "step": 51500 | |
| }, | |
| { | |
| "loss": 1.1014, | |
| "grad_norm": 2.855989456176758, | |
| "learning_rate": 4.528851641584813e-05, | |
| "epoch": 0.77, | |
| "step": 51525 | |
| }, | |
| { | |
| "loss": 1.1184, | |
| "grad_norm": 1.9916918277740479, | |
| "learning_rate": 4.52134210447268e-05, | |
| "epoch": 0.77, | |
| "step": 51550 | |
| }, | |
| { | |
| "loss": 1.1653, | |
| "grad_norm": 2.4080750942230225, | |
| "learning_rate": 4.513832567360548e-05, | |
| "epoch": 0.77, | |
| "step": 51575 | |
| }, | |
| { | |
| "loss": 1.0892, | |
| "grad_norm": 2.9220423698425293, | |
| "learning_rate": 4.5063230302484154e-05, | |
| "epoch": 0.77, | |
| "step": 51600 | |
| }, | |
| { | |
| "loss": 1.1106, | |
| "grad_norm": 1.840510368347168, | |
| "learning_rate": 4.498813493136283e-05, | |
| "epoch": 0.78, | |
| "step": 51625 | |
| }, | |
| { | |
| "loss": 1.1684, | |
| "grad_norm": 1.8185040950775146, | |
| "learning_rate": 4.491303956024151e-05, | |
| "epoch": 0.78, | |
| "step": 51650 | |
| }, | |
| { | |
| "loss": 1.1476, | |
| "grad_norm": 1.5126756429672241, | |
| "learning_rate": 4.4837944189120184e-05, | |
| "epoch": 0.78, | |
| "step": 51675 | |
| }, | |
| { | |
| "loss": 1.0972, | |
| "grad_norm": 2.2125062942504883, | |
| "learning_rate": 4.476284881799886e-05, | |
| "epoch": 0.78, | |
| "step": 51700 | |
| }, | |
| { | |
| "loss": 1.1401, | |
| "grad_norm": 2.3873279094696045, | |
| "learning_rate": 4.4687753446877535e-05, | |
| "epoch": 0.78, | |
| "step": 51725 | |
| }, | |
| { | |
| "loss": 1.0909, | |
| "grad_norm": 1.7589930295944214, | |
| "learning_rate": 4.4612658075756213e-05, | |
| "epoch": 0.78, | |
| "step": 51750 | |
| }, | |
| { | |
| "loss": 1.1803, | |
| "grad_norm": 1.4225094318389893, | |
| "learning_rate": 4.4537562704634886e-05, | |
| "epoch": 0.78, | |
| "step": 51775 | |
| }, | |
| { | |
| "loss": 1.1538, | |
| "grad_norm": 1.1532394886016846, | |
| "learning_rate": 4.4462467333513564e-05, | |
| "epoch": 0.78, | |
| "step": 51800 | |
| }, | |
| { | |
| "loss": 1.1152, | |
| "grad_norm": 1.6151081323623657, | |
| "learning_rate": 4.4387371962392237e-05, | |
| "epoch": 0.78, | |
| "step": 51825 | |
| }, | |
| { | |
| "loss": 1.1382, | |
| "grad_norm": 2.63031005859375, | |
| "learning_rate": 4.4312276591270915e-05, | |
| "epoch": 0.78, | |
| "step": 51850 | |
| }, | |
| { | |
| "loss": 1.1601, | |
| "grad_norm": 2.0375289916992188, | |
| "learning_rate": 4.4237181220149594e-05, | |
| "epoch": 0.78, | |
| "step": 51875 | |
| }, | |
| { | |
| "loss": 1.1179, | |
| "grad_norm": 1.1906908750534058, | |
| "learning_rate": 4.4162085849028266e-05, | |
| "epoch": 0.78, | |
| "step": 51900 | |
| }, | |
| { | |
| "loss": 1.1114, | |
| "grad_norm": 1.8762083053588867, | |
| "learning_rate": 4.4086990477906945e-05, | |
| "epoch": 0.78, | |
| "step": 51925 | |
| }, | |
| { | |
| "loss": 1.1274, | |
| "grad_norm": 1.7392081022262573, | |
| "learning_rate": 4.4011895106785624e-05, | |
| "epoch": 0.78, | |
| "step": 51950 | |
| }, | |
| { | |
| "loss": 1.1117, | |
| "grad_norm": 1.480962872505188, | |
| "learning_rate": 4.3936799735664296e-05, | |
| "epoch": 0.78, | |
| "step": 51975 | |
| }, | |
| { | |
| "loss": 1.1623, | |
| "grad_norm": 1.1408778429031372, | |
| "learning_rate": 4.386170436454297e-05, | |
| "epoch": 0.78, | |
| "step": 52000 | |
| }, | |
| { | |
| "loss": 1.0937, | |
| "grad_norm": 2.5744760036468506, | |
| "learning_rate": 4.378660899342165e-05, | |
| "epoch": 0.78, | |
| "step": 52025 | |
| }, | |
| { | |
| "loss": 1.1968, | |
| "grad_norm": 1.890031337738037, | |
| "learning_rate": 4.371151362230032e-05, | |
| "epoch": 0.78, | |
| "step": 52050 | |
| }, | |
| { | |
| "loss": 1.1761, | |
| "grad_norm": 1.9659225940704346, | |
| "learning_rate": 4.3636418251179e-05, | |
| "epoch": 0.78, | |
| "step": 52075 | |
| }, | |
| { | |
| "loss": 1.176, | |
| "grad_norm": 1.4123088121414185, | |
| "learning_rate": 4.356132288005768e-05, | |
| "epoch": 0.78, | |
| "step": 52100 | |
| }, | |
| { | |
| "loss": 1.1493, | |
| "grad_norm": 1.9861273765563965, | |
| "learning_rate": 4.348622750893635e-05, | |
| "epoch": 0.78, | |
| "step": 52125 | |
| }, | |
| { | |
| "loss": 1.1376, | |
| "grad_norm": 1.3282297849655151, | |
| "learning_rate": 4.341113213781503e-05, | |
| "epoch": 0.78, | |
| "step": 52150 | |
| }, | |
| { | |
| "loss": 1.1337, | |
| "grad_norm": 1.8567203283309937, | |
| "learning_rate": 4.333603676669371e-05, | |
| "epoch": 0.78, | |
| "step": 52175 | |
| }, | |
| { | |
| "loss": 1.1678, | |
| "grad_norm": 1.387803077697754, | |
| "learning_rate": 4.326094139557238e-05, | |
| "epoch": 0.78, | |
| "step": 52200 | |
| }, | |
| { | |
| "loss": 1.1369, | |
| "grad_norm": 1.4993411302566528, | |
| "learning_rate": 4.318584602445105e-05, | |
| "epoch": 0.78, | |
| "step": 52225 | |
| }, | |
| { | |
| "loss": 1.1072, | |
| "grad_norm": 1.83002769947052, | |
| "learning_rate": 4.311075065332973e-05, | |
| "epoch": 0.78, | |
| "step": 52250 | |
| }, | |
| { | |
| "loss": 1.1275, | |
| "grad_norm": 1.0453954935073853, | |
| "learning_rate": 4.303565528220841e-05, | |
| "epoch": 0.79, | |
| "step": 52275 | |
| }, | |
| { | |
| "loss": 1.0546, | |
| "grad_norm": 1.6221436262130737, | |
| "learning_rate": 4.296055991108708e-05, | |
| "epoch": 0.79, | |
| "step": 52300 | |
| }, | |
| { | |
| "loss": 1.1474, | |
| "grad_norm": 1.4290169477462769, | |
| "learning_rate": 4.288546453996576e-05, | |
| "epoch": 0.79, | |
| "step": 52325 | |
| }, | |
| { | |
| "loss": 1.0947, | |
| "grad_norm": 2.136678695678711, | |
| "learning_rate": 4.281036916884443e-05, | |
| "epoch": 0.79, | |
| "step": 52350 | |
| }, | |
| { | |
| "loss": 1.1738, | |
| "grad_norm": 1.6790881156921387, | |
| "learning_rate": 4.273527379772311e-05, | |
| "epoch": 0.79, | |
| "step": 52375 | |
| }, | |
| { | |
| "loss": 1.1474, | |
| "grad_norm": 1.1431602239608765, | |
| "learning_rate": 4.266017842660179e-05, | |
| "epoch": 0.79, | |
| "step": 52400 | |
| }, | |
| { | |
| "loss": 1.1478, | |
| "grad_norm": 1.6592998504638672, | |
| "learning_rate": 4.258508305548046e-05, | |
| "epoch": 0.79, | |
| "step": 52425 | |
| }, | |
| { | |
| "loss": 1.0866, | |
| "grad_norm": 3.1507890224456787, | |
| "learning_rate": 4.250998768435914e-05, | |
| "epoch": 0.79, | |
| "step": 52450 | |
| }, | |
| { | |
| "loss": 1.1061, | |
| "grad_norm": 2.271561861038208, | |
| "learning_rate": 4.243489231323781e-05, | |
| "epoch": 0.79, | |
| "step": 52475 | |
| }, | |
| { | |
| "loss": 1.1853, | |
| "grad_norm": 1.3959341049194336, | |
| "learning_rate": 4.235979694211649e-05, | |
| "epoch": 0.79, | |
| "step": 52500 | |
| }, | |
| { | |
| "loss": 1.1685, | |
| "grad_norm": 1.9828035831451416, | |
| "learning_rate": 4.2284701570995164e-05, | |
| "epoch": 0.79, | |
| "step": 52525 | |
| }, | |
| { | |
| "loss": 1.1353, | |
| "grad_norm": 1.348754644393921, | |
| "learning_rate": 4.220960619987384e-05, | |
| "epoch": 0.79, | |
| "step": 52550 | |
| }, | |
| { | |
| "loss": 1.0879, | |
| "grad_norm": 2.036592483520508, | |
| "learning_rate": 4.2134510828752514e-05, | |
| "epoch": 0.79, | |
| "step": 52575 | |
| }, | |
| { | |
| "loss": 1.1763, | |
| "grad_norm": 2.1805171966552734, | |
| "learning_rate": 4.205941545763119e-05, | |
| "epoch": 0.79, | |
| "step": 52600 | |
| }, | |
| { | |
| "loss": 1.0777, | |
| "grad_norm": 1.3825914859771729, | |
| "learning_rate": 4.198432008650987e-05, | |
| "epoch": 0.79, | |
| "step": 52625 | |
| }, | |
| { | |
| "loss": 1.1068, | |
| "grad_norm": 1.282179832458496, | |
| "learning_rate": 4.1909224715388544e-05, | |
| "epoch": 0.79, | |
| "step": 52650 | |
| }, | |
| { | |
| "loss": 1.1387, | |
| "grad_norm": 1.4758460521697998, | |
| "learning_rate": 4.183412934426722e-05, | |
| "epoch": 0.79, | |
| "step": 52675 | |
| }, | |
| { | |
| "loss": 1.1992, | |
| "grad_norm": 1.5327672958374023, | |
| "learning_rate": 4.1759033973145895e-05, | |
| "epoch": 0.79, | |
| "step": 52700 | |
| }, | |
| { | |
| "loss": 1.1051, | |
| "grad_norm": 1.300261378288269, | |
| "learning_rate": 4.1683938602024574e-05, | |
| "epoch": 0.79, | |
| "step": 52725 | |
| }, | |
| { | |
| "loss": 1.1114, | |
| "grad_norm": 1.1575740575790405, | |
| "learning_rate": 4.1608843230903246e-05, | |
| "epoch": 0.79, | |
| "step": 52750 | |
| }, | |
| { | |
| "loss": 1.1132, | |
| "grad_norm": 1.5386431217193604, | |
| "learning_rate": 4.1533747859781925e-05, | |
| "epoch": 0.79, | |
| "step": 52775 | |
| }, | |
| { | |
| "loss": 1.1619, | |
| "grad_norm": 1.5786181688308716, | |
| "learning_rate": 4.14586524886606e-05, | |
| "epoch": 0.79, | |
| "step": 52800 | |
| }, | |
| { | |
| "loss": 1.1353, | |
| "grad_norm": 2.4933083057403564, | |
| "learning_rate": 4.1383557117539276e-05, | |
| "epoch": 0.79, | |
| "step": 52825 | |
| }, | |
| { | |
| "loss": 1.1693, | |
| "grad_norm": 2.640106439590454, | |
| "learning_rate": 4.1308461746417955e-05, | |
| "epoch": 0.79, | |
| "step": 52850 | |
| }, | |
| { | |
| "loss": 1.1121, | |
| "grad_norm": 1.0569820404052734, | |
| "learning_rate": 4.123336637529663e-05, | |
| "epoch": 0.79, | |
| "step": 52875 | |
| }, | |
| { | |
| "loss": 1.0855, | |
| "grad_norm": 1.2210026979446411, | |
| "learning_rate": 4.1158271004175306e-05, | |
| "epoch": 0.79, | |
| "step": 52900 | |
| }, | |
| { | |
| "loss": 1.1511, | |
| "grad_norm": 1.5878472328186035, | |
| "learning_rate": 4.1083175633053985e-05, | |
| "epoch": 0.79, | |
| "step": 52925 | |
| }, | |
| { | |
| "loss": 1.1387, | |
| "grad_norm": 1.8040430545806885, | |
| "learning_rate": 4.100808026193266e-05, | |
| "epoch": 0.8, | |
| "step": 52950 | |
| }, | |
| { | |
| "loss": 1.1351, | |
| "grad_norm": 1.5732437372207642, | |
| "learning_rate": 4.093298489081133e-05, | |
| "epoch": 0.8, | |
| "step": 52975 | |
| }, | |
| { | |
| "loss": 1.1317, | |
| "grad_norm": 1.9610670804977417, | |
| "learning_rate": 4.085788951969001e-05, | |
| "epoch": 0.8, | |
| "step": 53000 | |
| }, | |
| { | |
| "loss": 1.1201, | |
| "grad_norm": 1.1342912912368774, | |
| "learning_rate": 4.078279414856868e-05, | |
| "epoch": 0.8, | |
| "step": 53025 | |
| }, | |
| { | |
| "loss": 1.1302, | |
| "grad_norm": 1.7887520790100098, | |
| "learning_rate": 4.070769877744736e-05, | |
| "epoch": 0.8, | |
| "step": 53050 | |
| }, | |
| { | |
| "loss": 1.1894, | |
| "grad_norm": 2.0694801807403564, | |
| "learning_rate": 4.063260340632604e-05, | |
| "epoch": 0.8, | |
| "step": 53075 | |
| }, | |
| { | |
| "loss": 1.106, | |
| "grad_norm": 1.582124948501587, | |
| "learning_rate": 4.055750803520471e-05, | |
| "epoch": 0.8, | |
| "step": 53100 | |
| }, | |
| { | |
| "loss": 1.1535, | |
| "grad_norm": 0.8474487066268921, | |
| "learning_rate": 4.048241266408339e-05, | |
| "epoch": 0.8, | |
| "step": 53125 | |
| }, | |
| { | |
| "loss": 1.1005, | |
| "grad_norm": 1.3499822616577148, | |
| "learning_rate": 4.041032110780692e-05, | |
| "epoch": 0.8, | |
| "step": 53150 | |
| }, | |
| { | |
| "loss": 1.1252, | |
| "grad_norm": 3.777379274368286, | |
| "learning_rate": 4.0335225736685596e-05, | |
| "epoch": 0.8, | |
| "step": 53175 | |
| }, | |
| { | |
| "loss": 1.0519, | |
| "grad_norm": 1.1216979026794434, | |
| "learning_rate": 4.026013036556427e-05, | |
| "epoch": 0.8, | |
| "step": 53200 | |
| }, | |
| { | |
| "loss": 1.1814, | |
| "grad_norm": 1.710165023803711, | |
| "learning_rate": 4.018503499444295e-05, | |
| "epoch": 0.8, | |
| "step": 53225 | |
| }, | |
| { | |
| "loss": 1.1011, | |
| "grad_norm": 1.4310054779052734, | |
| "learning_rate": 4.010993962332162e-05, | |
| "epoch": 0.8, | |
| "step": 53250 | |
| }, | |
| { | |
| "loss": 1.1253, | |
| "grad_norm": 1.3393102884292603, | |
| "learning_rate": 4.00348442522003e-05, | |
| "epoch": 0.8, | |
| "step": 53275 | |
| }, | |
| { | |
| "loss": 1.2024, | |
| "grad_norm": 0.9497338533401489, | |
| "learning_rate": 3.995974888107897e-05, | |
| "epoch": 0.8, | |
| "step": 53300 | |
| }, | |
| { | |
| "loss": 1.1295, | |
| "grad_norm": 1.777761697769165, | |
| "learning_rate": 3.988465350995765e-05, | |
| "epoch": 0.8, | |
| "step": 53325 | |
| }, | |
| { | |
| "loss": 1.0795, | |
| "grad_norm": 1.3849236965179443, | |
| "learning_rate": 3.980955813883632e-05, | |
| "epoch": 0.8, | |
| "step": 53350 | |
| }, | |
| { | |
| "loss": 1.0983, | |
| "grad_norm": 1.8323969841003418, | |
| "learning_rate": 3.9734462767715e-05, | |
| "epoch": 0.8, | |
| "step": 53375 | |
| }, | |
| { | |
| "loss": 1.1308, | |
| "grad_norm": 1.5328776836395264, | |
| "learning_rate": 3.965936739659368e-05, | |
| "epoch": 0.8, | |
| "step": 53400 | |
| }, | |
| { | |
| "loss": 1.1389, | |
| "grad_norm": 1.2152605056762695, | |
| "learning_rate": 3.958427202547235e-05, | |
| "epoch": 0.8, | |
| "step": 53425 | |
| }, | |
| { | |
| "loss": 1.1156, | |
| "grad_norm": 1.0716402530670166, | |
| "learning_rate": 3.950917665435103e-05, | |
| "epoch": 0.8, | |
| "step": 53450 | |
| }, | |
| { | |
| "loss": 1.0923, | |
| "grad_norm": 1.3493958711624146, | |
| "learning_rate": 3.94340812832297e-05, | |
| "epoch": 0.8, | |
| "step": 53475 | |
| }, | |
| { | |
| "loss": 1.0859, | |
| "grad_norm": 1.8882994651794434, | |
| "learning_rate": 3.935898591210838e-05, | |
| "epoch": 0.8, | |
| "step": 53500 | |
| }, | |
| { | |
| "loss": 1.0888, | |
| "grad_norm": 1.1161054372787476, | |
| "learning_rate": 3.928389054098706e-05, | |
| "epoch": 0.8, | |
| "step": 53525 | |
| }, | |
| { | |
| "loss": 1.1155, | |
| "grad_norm": 1.3479957580566406, | |
| "learning_rate": 3.920879516986573e-05, | |
| "epoch": 0.8, | |
| "step": 53550 | |
| }, | |
| { | |
| "loss": 1.135, | |
| "grad_norm": 1.2609208822250366, | |
| "learning_rate": 3.9133699798744404e-05, | |
| "epoch": 0.8, | |
| "step": 53575 | |
| }, | |
| { | |
| "loss": 1.0736, | |
| "grad_norm": 1.8553820848464966, | |
| "learning_rate": 3.905860442762308e-05, | |
| "epoch": 0.81, | |
| "step": 53600 | |
| }, | |
| { | |
| "loss": 1.091, | |
| "grad_norm": 1.7198560237884521, | |
| "learning_rate": 3.898350905650176e-05, | |
| "epoch": 0.81, | |
| "step": 53625 | |
| }, | |
| { | |
| "loss": 1.1926, | |
| "grad_norm": 1.0929125547409058, | |
| "learning_rate": 3.8908413685380433e-05, | |
| "epoch": 0.81, | |
| "step": 53650 | |
| }, | |
| { | |
| "loss": 1.1161, | |
| "grad_norm": 1.4651769399642944, | |
| "learning_rate": 3.883331831425911e-05, | |
| "epoch": 0.81, | |
| "step": 53675 | |
| }, | |
| { | |
| "loss": 1.1704, | |
| "grad_norm": 2.1259841918945312, | |
| "learning_rate": 3.8758222943137784e-05, | |
| "epoch": 0.81, | |
| "step": 53700 | |
| }, | |
| { | |
| "loss": 1.1024, | |
| "grad_norm": 1.6856151819229126, | |
| "learning_rate": 3.868312757201646e-05, | |
| "epoch": 0.81, | |
| "step": 53725 | |
| }, | |
| { | |
| "loss": 1.1725, | |
| "grad_norm": 1.9457602500915527, | |
| "learning_rate": 3.860803220089514e-05, | |
| "epoch": 0.81, | |
| "step": 53750 | |
| }, | |
| { | |
| "loss": 1.1107, | |
| "grad_norm": 1.1278740167617798, | |
| "learning_rate": 3.8532936829773814e-05, | |
| "epoch": 0.81, | |
| "step": 53775 | |
| }, | |
| { | |
| "loss": 1.0866, | |
| "grad_norm": 1.989402174949646, | |
| "learning_rate": 3.8457841458652486e-05, | |
| "epoch": 0.81, | |
| "step": 53800 | |
| }, | |
| { | |
| "loss": 1.09, | |
| "grad_norm": 2.686849355697632, | |
| "learning_rate": 3.8382746087531165e-05, | |
| "epoch": 0.81, | |
| "step": 53825 | |
| }, | |
| { | |
| "loss": 1.0897, | |
| "grad_norm": 2.199162244796753, | |
| "learning_rate": 3.8307650716409844e-05, | |
| "epoch": 0.81, | |
| "step": 53850 | |
| }, | |
| { | |
| "loss": 1.1071, | |
| "grad_norm": 0.9810658693313599, | |
| "learning_rate": 3.8232555345288516e-05, | |
| "epoch": 0.81, | |
| "step": 53875 | |
| }, | |
| { | |
| "loss": 1.1032, | |
| "grad_norm": 1.1024478673934937, | |
| "learning_rate": 3.8157459974167195e-05, | |
| "epoch": 0.81, | |
| "step": 53900 | |
| }, | |
| { | |
| "loss": 1.0515, | |
| "grad_norm": 1.8465054035186768, | |
| "learning_rate": 3.8082364603045874e-05, | |
| "epoch": 0.81, | |
| "step": 53925 | |
| }, | |
| { | |
| "loss": 1.0606, | |
| "grad_norm": 0.9782311320304871, | |
| "learning_rate": 3.8007269231924546e-05, | |
| "epoch": 0.81, | |
| "step": 53950 | |
| }, | |
| { | |
| "loss": 1.1054, | |
| "grad_norm": 1.4638195037841797, | |
| "learning_rate": 3.7932173860803225e-05, | |
| "epoch": 0.81, | |
| "step": 53975 | |
| }, | |
| { | |
| "loss": 1.1584, | |
| "grad_norm": 2.066131114959717, | |
| "learning_rate": 3.78570784896819e-05, | |
| "epoch": 0.81, | |
| "step": 54000 | |
| }, | |
| { | |
| "loss": 1.1254, | |
| "grad_norm": 1.6561390161514282, | |
| "learning_rate": 3.778198311856057e-05, | |
| "epoch": 0.81, | |
| "step": 54025 | |
| }, | |
| { | |
| "loss": 1.1213, | |
| "grad_norm": 1.693764090538025, | |
| "learning_rate": 3.770688774743925e-05, | |
| "epoch": 0.81, | |
| "step": 54050 | |
| }, | |
| { | |
| "loss": 1.0754, | |
| "grad_norm": 1.5490859746932983, | |
| "learning_rate": 3.763179237631793e-05, | |
| "epoch": 0.81, | |
| "step": 54075 | |
| }, | |
| { | |
| "loss": 1.1704, | |
| "grad_norm": 1.7576946020126343, | |
| "learning_rate": 3.75566970051966e-05, | |
| "epoch": 0.81, | |
| "step": 54100 | |
| }, | |
| { | |
| "loss": 1.1473, | |
| "grad_norm": 1.8954912424087524, | |
| "learning_rate": 3.748160163407528e-05, | |
| "epoch": 0.81, | |
| "step": 54125 | |
| }, | |
| { | |
| "loss": 1.1391, | |
| "grad_norm": 2.0620269775390625, | |
| "learning_rate": 3.7406506262953957e-05, | |
| "epoch": 0.81, | |
| "step": 54150 | |
| }, | |
| { | |
| "loss": 1.1656, | |
| "grad_norm": 1.3634029626846313, | |
| "learning_rate": 3.733141089183263e-05, | |
| "epoch": 0.81, | |
| "step": 54175 | |
| }, | |
| { | |
| "loss": 1.1246, | |
| "grad_norm": 1.4298192262649536, | |
| "learning_rate": 3.725631552071131e-05, | |
| "epoch": 0.81, | |
| "step": 54200 | |
| }, | |
| { | |
| "loss": 1.1954, | |
| "grad_norm": 1.760016679763794, | |
| "learning_rate": 3.718122014958998e-05, | |
| "epoch": 0.81, | |
| "step": 54225 | |
| }, | |
| { | |
| "loss": 1.1634, | |
| "grad_norm": 1.460942268371582, | |
| "learning_rate": 3.710612477846866e-05, | |
| "epoch": 0.81, | |
| "step": 54250 | |
| }, | |
| { | |
| "loss": 1.0715, | |
| "grad_norm": 0.9953238368034363, | |
| "learning_rate": 3.703102940734734e-05, | |
| "epoch": 0.82, | |
| "step": 54275 | |
| }, | |
| { | |
| "loss": 1.1588, | |
| "grad_norm": 1.3567308187484741, | |
| "learning_rate": 3.695593403622601e-05, | |
| "epoch": 0.82, | |
| "step": 54300 | |
| }, | |
| { | |
| "loss": 1.1086, | |
| "grad_norm": 1.4111878871917725, | |
| "learning_rate": 3.688083866510468e-05, | |
| "epoch": 0.82, | |
| "step": 54325 | |
| }, | |
| { | |
| "loss": 1.161, | |
| "grad_norm": 1.7530951499938965, | |
| "learning_rate": 3.680574329398336e-05, | |
| "epoch": 0.82, | |
| "step": 54350 | |
| }, | |
| { | |
| "loss": 1.1003, | |
| "grad_norm": 1.5563117265701294, | |
| "learning_rate": 3.673064792286204e-05, | |
| "epoch": 0.82, | |
| "step": 54375 | |
| }, | |
| { | |
| "loss": 1.163, | |
| "grad_norm": 1.0254262685775757, | |
| "learning_rate": 3.665555255174071e-05, | |
| "epoch": 0.82, | |
| "step": 54400 | |
| }, | |
| { | |
| "loss": 1.1384, | |
| "grad_norm": 2.547769784927368, | |
| "learning_rate": 3.658045718061939e-05, | |
| "epoch": 0.82, | |
| "step": 54425 | |
| }, | |
| { | |
| "loss": 1.107, | |
| "grad_norm": 1.0468461513519287, | |
| "learning_rate": 3.650536180949806e-05, | |
| "epoch": 0.82, | |
| "step": 54450 | |
| }, | |
| { | |
| "loss": 1.1431, | |
| "grad_norm": 1.1783130168914795, | |
| "learning_rate": 3.643026643837674e-05, | |
| "epoch": 0.82, | |
| "step": 54475 | |
| }, | |
| { | |
| "loss": 1.1398, | |
| "grad_norm": 1.3592449426651, | |
| "learning_rate": 3.635517106725542e-05, | |
| "epoch": 0.82, | |
| "step": 54500 | |
| }, | |
| { | |
| "loss": 1.0942, | |
| "grad_norm": 1.687246322631836, | |
| "learning_rate": 3.628007569613409e-05, | |
| "epoch": 0.82, | |
| "step": 54525 | |
| }, | |
| { | |
| "loss": 1.0334, | |
| "grad_norm": 1.396044373512268, | |
| "learning_rate": 3.6204980325012764e-05, | |
| "epoch": 0.82, | |
| "step": 54550 | |
| }, | |
| { | |
| "loss": 1.0524, | |
| "grad_norm": 2.7093379497528076, | |
| "learning_rate": 3.612988495389144e-05, | |
| "epoch": 0.82, | |
| "step": 54575 | |
| }, | |
| { | |
| "loss": 1.1404, | |
| "grad_norm": 1.8118054866790771, | |
| "learning_rate": 3.605478958277012e-05, | |
| "epoch": 0.82, | |
| "step": 54600 | |
| }, | |
| { | |
| "loss": 1.0971, | |
| "grad_norm": 1.241155982017517, | |
| "learning_rate": 3.5979694211648794e-05, | |
| "epoch": 0.82, | |
| "step": 54625 | |
| }, | |
| { | |
| "loss": 1.0957, | |
| "grad_norm": 1.7652029991149902, | |
| "learning_rate": 3.590459884052747e-05, | |
| "epoch": 0.82, | |
| "step": 54650 | |
| }, | |
| { | |
| "loss": 1.1159, | |
| "grad_norm": 2.1295764446258545, | |
| "learning_rate": 3.5829503469406145e-05, | |
| "epoch": 0.82, | |
| "step": 54675 | |
| }, | |
| { | |
| "loss": 1.1132, | |
| "grad_norm": 1.4499530792236328, | |
| "learning_rate": 3.5754408098284824e-05, | |
| "epoch": 0.82, | |
| "step": 54700 | |
| }, | |
| { | |
| "loss": 1.1356, | |
| "grad_norm": 1.3263312578201294, | |
| "learning_rate": 3.56793127271635e-05, | |
| "epoch": 0.82, | |
| "step": 54725 | |
| }, | |
| { | |
| "loss": 1.0752, | |
| "grad_norm": 1.5875509977340698, | |
| "learning_rate": 3.5604217356042175e-05, | |
| "epoch": 0.82, | |
| "step": 54750 | |
| }, | |
| { | |
| "loss": 1.1495, | |
| "grad_norm": 1.2747198343276978, | |
| "learning_rate": 3.552912198492085e-05, | |
| "epoch": 0.82, | |
| "step": 54775 | |
| }, | |
| { | |
| "loss": 1.0794, | |
| "grad_norm": 3.039198637008667, | |
| "learning_rate": 3.5454026613799526e-05, | |
| "epoch": 0.82, | |
| "step": 54800 | |
| }, | |
| { | |
| "loss": 1.0468, | |
| "grad_norm": 1.2731279134750366, | |
| "learning_rate": 3.5378931242678205e-05, | |
| "epoch": 0.82, | |
| "step": 54825 | |
| }, | |
| { | |
| "loss": 1.0554, | |
| "grad_norm": 2.2691845893859863, | |
| "learning_rate": 3.530383587155688e-05, | |
| "epoch": 0.82, | |
| "step": 54850 | |
| }, | |
| { | |
| "loss": 1.1324, | |
| "grad_norm": 1.3395280838012695, | |
| "learning_rate": 3.5228740500435556e-05, | |
| "epoch": 0.82, | |
| "step": 54875 | |
| }, | |
| { | |
| "loss": 1.036, | |
| "grad_norm": 1.1065006256103516, | |
| "learning_rate": 3.5153645129314235e-05, | |
| "epoch": 0.82, | |
| "step": 54900 | |
| }, | |
| { | |
| "loss": 1.05, | |
| "grad_norm": 1.7697545289993286, | |
| "learning_rate": 3.507854975819291e-05, | |
| "epoch": 0.82, | |
| "step": 54925 | |
| }, | |
| { | |
| "loss": 1.1362, | |
| "grad_norm": 1.0653153657913208, | |
| "learning_rate": 3.5003454387071585e-05, | |
| "epoch": 0.83, | |
| "step": 54950 | |
| }, | |
| { | |
| "loss": 1.0546, | |
| "grad_norm": 1.5754921436309814, | |
| "learning_rate": 3.492835901595026e-05, | |
| "epoch": 0.83, | |
| "step": 54975 | |
| }, | |
| { | |
| "loss": 0.9954, | |
| "grad_norm": 1.166438341140747, | |
| "learning_rate": 3.485326364482893e-05, | |
| "epoch": 0.83, | |
| "step": 55000 | |
| }, | |
| { | |
| "loss": 1.0758, | |
| "grad_norm": 1.2350513935089111, | |
| "learning_rate": 3.4778168273707615e-05, | |
| "epoch": 0.83, | |
| "step": 55025 | |
| }, | |
| { | |
| "loss": 1.0664, | |
| "grad_norm": 1.1638765335083008, | |
| "learning_rate": 3.470307290258629e-05, | |
| "epoch": 0.83, | |
| "step": 55050 | |
| }, | |
| { | |
| "loss": 1.1558, | |
| "grad_norm": 1.0206239223480225, | |
| "learning_rate": 3.462797753146496e-05, | |
| "epoch": 0.83, | |
| "step": 55075 | |
| }, | |
| { | |
| "loss": 1.1298, | |
| "grad_norm": 1.1066503524780273, | |
| "learning_rate": 3.455288216034364e-05, | |
| "epoch": 0.83, | |
| "step": 55100 | |
| }, | |
| { | |
| "loss": 1.114, | |
| "grad_norm": 1.242811918258667, | |
| "learning_rate": 3.447778678922232e-05, | |
| "epoch": 0.83, | |
| "step": 55125 | |
| }, | |
| { | |
| "loss": 1.0795, | |
| "grad_norm": 0.8851810097694397, | |
| "learning_rate": 3.440269141810099e-05, | |
| "epoch": 0.83, | |
| "step": 55150 | |
| }, | |
| { | |
| "loss": 1.1065, | |
| "grad_norm": 1.7904212474822998, | |
| "learning_rate": 3.432759604697967e-05, | |
| "epoch": 0.83, | |
| "step": 55175 | |
| }, | |
| { | |
| "loss": 1.013, | |
| "grad_norm": 1.2688441276550293, | |
| "learning_rate": 3.425250067585834e-05, | |
| "epoch": 0.83, | |
| "step": 55200 | |
| }, | |
| { | |
| "loss": 1.1373, | |
| "grad_norm": 1.1899800300598145, | |
| "learning_rate": 3.417740530473702e-05, | |
| "epoch": 0.83, | |
| "step": 55225 | |
| }, | |
| { | |
| "loss": 1.1135, | |
| "grad_norm": 1.9331419467926025, | |
| "learning_rate": 3.41023099336157e-05, | |
| "epoch": 0.83, | |
| "step": 55250 | |
| }, | |
| { | |
| "loss": 1.0993, | |
| "grad_norm": 1.6777852773666382, | |
| "learning_rate": 3.402721456249437e-05, | |
| "epoch": 0.83, | |
| "step": 55275 | |
| }, | |
| { | |
| "loss": 1.0528, | |
| "grad_norm": 1.469103217124939, | |
| "learning_rate": 3.395211919137304e-05, | |
| "epoch": 0.83, | |
| "step": 55300 | |
| }, | |
| { | |
| "loss": 1.1432, | |
| "grad_norm": 1.6784319877624512, | |
| "learning_rate": 3.387702382025172e-05, | |
| "epoch": 0.83, | |
| "step": 55325 | |
| }, | |
| { | |
| "loss": 1.0986, | |
| "grad_norm": 1.4824069738388062, | |
| "learning_rate": 3.38019284491304e-05, | |
| "epoch": 0.83, | |
| "step": 55350 | |
| }, | |
| { | |
| "loss": 1.1599, | |
| "grad_norm": 1.1543418169021606, | |
| "learning_rate": 3.372683307800907e-05, | |
| "epoch": 0.83, | |
| "step": 55375 | |
| }, | |
| { | |
| "loss": 1.0552, | |
| "grad_norm": 2.3624870777130127, | |
| "learning_rate": 3.365173770688775e-05, | |
| "epoch": 0.83, | |
| "step": 55400 | |
| }, | |
| { | |
| "loss": 1.1395, | |
| "grad_norm": 1.5518018007278442, | |
| "learning_rate": 3.357664233576642e-05, | |
| "epoch": 0.83, | |
| "step": 55425 | |
| }, | |
| { | |
| "loss": 1.1172, | |
| "grad_norm": 1.820732593536377, | |
| "learning_rate": 3.35015469646451e-05, | |
| "epoch": 0.83, | |
| "step": 55450 | |
| }, | |
| { | |
| "loss": 1.1409, | |
| "grad_norm": 1.5625290870666504, | |
| "learning_rate": 3.342645159352378e-05, | |
| "epoch": 0.83, | |
| "step": 55475 | |
| }, | |
| { | |
| "loss": 1.1088, | |
| "grad_norm": 1.6013075113296509, | |
| "learning_rate": 3.335135622240245e-05, | |
| "epoch": 0.83, | |
| "step": 55500 | |
| }, | |
| { | |
| "loss": 1.1256, | |
| "grad_norm": 1.2917579412460327, | |
| "learning_rate": 3.3276260851281125e-05, | |
| "epoch": 0.83, | |
| "step": 55525 | |
| }, | |
| { | |
| "loss": 1.1221, | |
| "grad_norm": 2.3396828174591064, | |
| "learning_rate": 3.3201165480159804e-05, | |
| "epoch": 0.83, | |
| "step": 55550 | |
| }, | |
| { | |
| "loss": 1.1166, | |
| "grad_norm": 0.9828691482543945, | |
| "learning_rate": 3.312607010903848e-05, | |
| "epoch": 0.83, | |
| "step": 55575 | |
| }, | |
| { | |
| "loss": 1.0945, | |
| "grad_norm": 1.5887751579284668, | |
| "learning_rate": 3.3050974737917155e-05, | |
| "epoch": 0.84, | |
| "step": 55600 | |
| }, | |
| { | |
| "loss": 1.0711, | |
| "grad_norm": 1.1289055347442627, | |
| "learning_rate": 3.2975879366795834e-05, | |
| "epoch": 0.84, | |
| "step": 55625 | |
| }, | |
| { | |
| "loss": 1.0644, | |
| "grad_norm": 1.4812935590744019, | |
| "learning_rate": 3.2900783995674506e-05, | |
| "epoch": 0.84, | |
| "step": 55650 | |
| }, | |
| { | |
| "loss": 1.102, | |
| "grad_norm": 1.3823920488357544, | |
| "learning_rate": 3.2825688624553185e-05, | |
| "epoch": 0.84, | |
| "step": 55675 | |
| }, | |
| { | |
| "loss": 1.1011, | |
| "grad_norm": 1.7511022090911865, | |
| "learning_rate": 3.2750593253431863e-05, | |
| "epoch": 0.84, | |
| "step": 55700 | |
| }, | |
| { | |
| "loss": 1.183, | |
| "grad_norm": 1.9509655237197876, | |
| "learning_rate": 3.2675497882310536e-05, | |
| "epoch": 0.84, | |
| "step": 55725 | |
| }, | |
| { | |
| "loss": 1.0795, | |
| "grad_norm": 0.9376107454299927, | |
| "learning_rate": 3.260040251118921e-05, | |
| "epoch": 0.84, | |
| "step": 55750 | |
| }, | |
| { | |
| "loss": 1.1001, | |
| "grad_norm": 1.123742938041687, | |
| "learning_rate": 3.252530714006789e-05, | |
| "epoch": 0.84, | |
| "step": 55775 | |
| }, | |
| { | |
| "loss": 1.1292, | |
| "grad_norm": 2.6337194442749023, | |
| "learning_rate": 3.2450211768946565e-05, | |
| "epoch": 0.84, | |
| "step": 55800 | |
| }, | |
| { | |
| "loss": 1.0631, | |
| "grad_norm": 1.1294831037521362, | |
| "learning_rate": 3.237511639782524e-05, | |
| "epoch": 0.84, | |
| "step": 55825 | |
| }, | |
| { | |
| "loss": 1.1375, | |
| "grad_norm": 1.2995752096176147, | |
| "learning_rate": 3.2300021026703916e-05, | |
| "epoch": 0.84, | |
| "step": 55850 | |
| }, | |
| { | |
| "loss": 1.1308, | |
| "grad_norm": 4.79863166809082, | |
| "learning_rate": 3.2224925655582595e-05, | |
| "epoch": 0.84, | |
| "step": 55875 | |
| }, | |
| { | |
| "loss": 1.1005, | |
| "grad_norm": 1.031606674194336, | |
| "learning_rate": 3.214983028446127e-05, | |
| "epoch": 0.84, | |
| "step": 55900 | |
| }, | |
| { | |
| "loss": 1.011, | |
| "grad_norm": 1.0204112529754639, | |
| "learning_rate": 3.2074734913339946e-05, | |
| "epoch": 0.84, | |
| "step": 55925 | |
| }, | |
| { | |
| "loss": 1.1464, | |
| "grad_norm": 1.0727862119674683, | |
| "learning_rate": 3.199963954221862e-05, | |
| "epoch": 0.84, | |
| "step": 55950 | |
| }, | |
| { | |
| "loss": 1.1208, | |
| "grad_norm": 1.1318399906158447, | |
| "learning_rate": 3.192454417109729e-05, | |
| "epoch": 0.84, | |
| "step": 55975 | |
| }, | |
| { | |
| "loss": 1.1243, | |
| "grad_norm": 1.1461580991744995, | |
| "learning_rate": 3.1849448799975976e-05, | |
| "epoch": 0.84, | |
| "step": 56000 | |
| }, | |
| { | |
| "loss": 1.1367, | |
| "grad_norm": 1.8615853786468506, | |
| "learning_rate": 3.177435342885465e-05, | |
| "epoch": 0.84, | |
| "step": 56025 | |
| }, | |
| { | |
| "loss": 1.1489, | |
| "grad_norm": 1.4172084331512451, | |
| "learning_rate": 3.169925805773332e-05, | |
| "epoch": 0.84, | |
| "step": 56050 | |
| }, | |
| { | |
| "loss": 1.1035, | |
| "grad_norm": 1.299654245376587, | |
| "learning_rate": 3.1624162686612e-05, | |
| "epoch": 0.84, | |
| "step": 56075 | |
| }, | |
| { | |
| "loss": 1.1718, | |
| "grad_norm": 1.4679521322250366, | |
| "learning_rate": 3.154906731549068e-05, | |
| "epoch": 0.84, | |
| "step": 56100 | |
| }, | |
| { | |
| "loss": 1.11, | |
| "grad_norm": 2.0875778198242188, | |
| "learning_rate": 3.147397194436935e-05, | |
| "epoch": 0.84, | |
| "step": 56125 | |
| }, | |
| { | |
| "loss": 1.1843, | |
| "grad_norm": 0.9587807655334473, | |
| "learning_rate": 3.139887657324803e-05, | |
| "epoch": 0.84, | |
| "step": 56150 | |
| }, | |
| { | |
| "loss": 1.1362, | |
| "grad_norm": 2.253598690032959, | |
| "learning_rate": 3.13237812021267e-05, | |
| "epoch": 0.84, | |
| "step": 56175 | |
| }, | |
| { | |
| "loss": 1.0859, | |
| "grad_norm": 2.3193461894989014, | |
| "learning_rate": 3.124868583100538e-05, | |
| "epoch": 0.84, | |
| "step": 56200 | |
| }, | |
| { | |
| "loss": 1.1347, | |
| "grad_norm": 1.5526835918426514, | |
| "learning_rate": 3.117359045988406e-05, | |
| "epoch": 0.84, | |
| "step": 56225 | |
| }, | |
| { | |
| "loss": 1.1155, | |
| "grad_norm": 1.7318717241287231, | |
| "learning_rate": 3.109849508876273e-05, | |
| "epoch": 0.84, | |
| "step": 56250 | |
| }, | |
| { | |
| "loss": 1.1701, | |
| "grad_norm": 0.9637216925621033, | |
| "learning_rate": 3.10233997176414e-05, | |
| "epoch": 0.85, | |
| "step": 56275 | |
| }, | |
| { | |
| "loss": 1.0931, | |
| "grad_norm": 1.6898939609527588, | |
| "learning_rate": 3.094830434652008e-05, | |
| "epoch": 0.85, | |
| "step": 56300 | |
| }, | |
| { | |
| "loss": 1.1269, | |
| "grad_norm": 1.4921387434005737, | |
| "learning_rate": 3.087320897539876e-05, | |
| "epoch": 0.85, | |
| "step": 56325 | |
| }, | |
| { | |
| "loss": 1.098, | |
| "grad_norm": 1.4121395349502563, | |
| "learning_rate": 3.079811360427743e-05, | |
| "epoch": 0.85, | |
| "step": 56350 | |
| }, | |
| { | |
| "loss": 1.1518, | |
| "grad_norm": 1.144964575767517, | |
| "learning_rate": 3.072301823315611e-05, | |
| "epoch": 0.85, | |
| "step": 56375 | |
| }, | |
| { | |
| "loss": 1.1257, | |
| "grad_norm": 1.5664513111114502, | |
| "learning_rate": 3.0647922862034784e-05, | |
| "epoch": 0.85, | |
| "step": 56400 | |
| }, | |
| { | |
| "loss": 1.12, | |
| "grad_norm": 1.3426709175109863, | |
| "learning_rate": 3.057282749091346e-05, | |
| "epoch": 0.85, | |
| "step": 56425 | |
| }, | |
| { | |
| "loss": 1.0914, | |
| "grad_norm": 1.4727264642715454, | |
| "learning_rate": 3.0497732119792138e-05, | |
| "epoch": 0.85, | |
| "step": 56450 | |
| }, | |
| { | |
| "loss": 1.2281, | |
| "grad_norm": 3.242955207824707, | |
| "learning_rate": 3.0422636748670813e-05, | |
| "epoch": 0.85, | |
| "step": 56475 | |
| }, | |
| { | |
| "loss": 1.1146, | |
| "grad_norm": 1.4398702383041382, | |
| "learning_rate": 3.034754137754949e-05, | |
| "epoch": 0.85, | |
| "step": 56500 | |
| }, | |
| { | |
| "loss": 1.1887, | |
| "grad_norm": 1.533019781112671, | |
| "learning_rate": 3.0272446006428168e-05, | |
| "epoch": 0.85, | |
| "step": 56525 | |
| }, | |
| { | |
| "loss": 1.0688, | |
| "grad_norm": 1.3019578456878662, | |
| "learning_rate": 3.0197350635306843e-05, | |
| "epoch": 0.85, | |
| "step": 56550 | |
| }, | |
| { | |
| "loss": 1.0937, | |
| "grad_norm": 3.774083375930786, | |
| "learning_rate": 3.012225526418552e-05, | |
| "epoch": 0.85, | |
| "step": 56575 | |
| }, | |
| { | |
| "loss": 1.0797, | |
| "grad_norm": 1.535316824913025, | |
| "learning_rate": 3.004715989306419e-05, | |
| "epoch": 0.85, | |
| "step": 56600 | |
| }, | |
| { | |
| "loss": 1.1406, | |
| "grad_norm": 3.624013662338257, | |
| "learning_rate": 2.9972064521942866e-05, | |
| "epoch": 0.85, | |
| "step": 56625 | |
| }, | |
| { | |
| "loss": 1.0986, | |
| "grad_norm": 2.2039883136749268, | |
| "learning_rate": 2.9896969150821545e-05, | |
| "epoch": 0.85, | |
| "step": 56650 | |
| }, | |
| { | |
| "loss": 1.1318, | |
| "grad_norm": 1.0096391439437866, | |
| "learning_rate": 2.982187377970022e-05, | |
| "epoch": 0.85, | |
| "step": 56675 | |
| }, | |
| { | |
| "loss": 1.1223, | |
| "grad_norm": 1.8855111598968506, | |
| "learning_rate": 2.9746778408578896e-05, | |
| "epoch": 0.85, | |
| "step": 56700 | |
| }, | |
| { | |
| "loss": 1.0381, | |
| "grad_norm": 1.7449959516525269, | |
| "learning_rate": 2.967168303745757e-05, | |
| "epoch": 0.85, | |
| "step": 56725 | |
| }, | |
| { | |
| "loss": 1.1104, | |
| "grad_norm": 1.9731217622756958, | |
| "learning_rate": 2.959658766633625e-05, | |
| "epoch": 0.85, | |
| "step": 56750 | |
| }, | |
| { | |
| "loss": 1.1399, | |
| "grad_norm": 1.2654249668121338, | |
| "learning_rate": 2.9521492295214926e-05, | |
| "epoch": 0.85, | |
| "step": 56775 | |
| }, | |
| { | |
| "loss": 1.0705, | |
| "grad_norm": 2.2102811336517334, | |
| "learning_rate": 2.94463969240936e-05, | |
| "epoch": 0.85, | |
| "step": 56800 | |
| }, | |
| { | |
| "loss": 1.1073, | |
| "grad_norm": 1.3911298513412476, | |
| "learning_rate": 2.9371301552972274e-05, | |
| "epoch": 0.85, | |
| "step": 56825 | |
| }, | |
| { | |
| "loss": 1.132, | |
| "grad_norm": 1.2584389448165894, | |
| "learning_rate": 2.9296206181850956e-05, | |
| "epoch": 0.85, | |
| "step": 56850 | |
| }, | |
| { | |
| "loss": 1.0874, | |
| "grad_norm": 2.038180351257324, | |
| "learning_rate": 2.9221110810729628e-05, | |
| "epoch": 0.85, | |
| "step": 56875 | |
| }, | |
| { | |
| "loss": 1.1324, | |
| "grad_norm": 2.4636690616607666, | |
| "learning_rate": 2.9146015439608303e-05, | |
| "epoch": 0.85, | |
| "step": 56900 | |
| }, | |
| { | |
| "loss": 1.1063, | |
| "grad_norm": 1.3092725276947021, | |
| "learning_rate": 2.907092006848698e-05, | |
| "epoch": 0.85, | |
| "step": 56925 | |
| }, | |
| { | |
| "loss": 1.0472, | |
| "grad_norm": 1.3636903762817383, | |
| "learning_rate": 2.8995824697365654e-05, | |
| "epoch": 0.86, | |
| "step": 56950 | |
| }, | |
| { | |
| "loss": 1.1155, | |
| "grad_norm": 1.915647029876709, | |
| "learning_rate": 2.8920729326244333e-05, | |
| "epoch": 0.86, | |
| "step": 56975 | |
| }, | |
| { | |
| "loss": 1.141, | |
| "grad_norm": 1.1012380123138428, | |
| "learning_rate": 2.884563395512301e-05, | |
| "epoch": 0.86, | |
| "step": 57000 | |
| }, | |
| { | |
| "loss": 1.1004, | |
| "grad_norm": 1.4259084463119507, | |
| "learning_rate": 2.8770538584001684e-05, | |
| "epoch": 0.86, | |
| "step": 57025 | |
| }, | |
| { | |
| "loss": 1.0786, | |
| "grad_norm": 1.411359190940857, | |
| "learning_rate": 2.8695443212880356e-05, | |
| "epoch": 0.86, | |
| "step": 57050 | |
| }, | |
| { | |
| "loss": 1.1746, | |
| "grad_norm": 1.6087229251861572, | |
| "learning_rate": 2.862034784175904e-05, | |
| "epoch": 0.86, | |
| "step": 57075 | |
| }, | |
| { | |
| "loss": 1.1506, | |
| "grad_norm": 1.2207622528076172, | |
| "learning_rate": 2.854525247063771e-05, | |
| "epoch": 0.86, | |
| "step": 57100 | |
| }, | |
| { | |
| "loss": 1.177, | |
| "grad_norm": 1.542277455329895, | |
| "learning_rate": 2.8470157099516386e-05, | |
| "epoch": 0.86, | |
| "step": 57125 | |
| }, | |
| { | |
| "loss": 1.1666, | |
| "grad_norm": 1.7982580661773682, | |
| "learning_rate": 2.839506172839506e-05, | |
| "epoch": 0.86, | |
| "step": 57150 | |
| }, | |
| { | |
| "loss": 1.1294, | |
| "grad_norm": 1.2220053672790527, | |
| "learning_rate": 2.831996635727374e-05, | |
| "epoch": 0.86, | |
| "step": 57175 | |
| }, | |
| { | |
| "loss": 1.1134, | |
| "grad_norm": 2.173220157623291, | |
| "learning_rate": 2.8244870986152416e-05, | |
| "epoch": 0.86, | |
| "step": 57200 | |
| }, | |
| { | |
| "loss": 1.1039, | |
| "grad_norm": 1.5563225746154785, | |
| "learning_rate": 2.816977561503109e-05, | |
| "epoch": 0.86, | |
| "step": 57225 | |
| }, | |
| { | |
| "loss": 1.1248, | |
| "grad_norm": 1.477427363395691, | |
| "learning_rate": 2.8094680243909767e-05, | |
| "epoch": 0.86, | |
| "step": 57250 | |
| }, | |
| { | |
| "loss": 1.106, | |
| "grad_norm": 1.374334692955017, | |
| "learning_rate": 2.801958487278844e-05, | |
| "epoch": 0.86, | |
| "step": 57275 | |
| }, | |
| { | |
| "loss": 1.1058, | |
| "grad_norm": 1.437056541442871, | |
| "learning_rate": 2.794448950166712e-05, | |
| "epoch": 0.86, | |
| "step": 57300 | |
| }, | |
| { | |
| "loss": 1.1278, | |
| "grad_norm": 1.2822084426879883, | |
| "learning_rate": 2.7869394130545797e-05, | |
| "epoch": 0.86, | |
| "step": 57325 | |
| }, | |
| { | |
| "loss": 1.1003, | |
| "grad_norm": 0.9716039299964905, | |
| "learning_rate": 2.779429875942447e-05, | |
| "epoch": 0.86, | |
| "step": 57350 | |
| }, | |
| { | |
| "loss": 1.0853, | |
| "grad_norm": 1.4873470067977905, | |
| "learning_rate": 2.7719203388303144e-05, | |
| "epoch": 0.86, | |
| "step": 57375 | |
| }, | |
| { | |
| "loss": 1.0961, | |
| "grad_norm": 1.8672046661376953, | |
| "learning_rate": 2.7644108017181823e-05, | |
| "epoch": 0.86, | |
| "step": 57400 | |
| }, | |
| { | |
| "loss": 1.1141, | |
| "grad_norm": 1.4299520254135132, | |
| "learning_rate": 2.75690126460605e-05, | |
| "epoch": 0.86, | |
| "step": 57425 | |
| }, | |
| { | |
| "loss": 1.1041, | |
| "grad_norm": 2.402892827987671, | |
| "learning_rate": 2.7493917274939174e-05, | |
| "epoch": 0.86, | |
| "step": 57450 | |
| }, | |
| { | |
| "loss": 1.1231, | |
| "grad_norm": 1.2294812202453613, | |
| "learning_rate": 2.741882190381785e-05, | |
| "epoch": 0.86, | |
| "step": 57475 | |
| }, | |
| { | |
| "loss": 1.127, | |
| "grad_norm": 1.7184091806411743, | |
| "learning_rate": 2.734372653269653e-05, | |
| "epoch": 0.86, | |
| "step": 57500 | |
| }, | |
| { | |
| "loss": 1.1925, | |
| "grad_norm": 1.3573827743530273, | |
| "learning_rate": 2.7268631161575204e-05, | |
| "epoch": 0.86, | |
| "step": 57525 | |
| }, | |
| { | |
| "loss": 1.1482, | |
| "grad_norm": 1.7570611238479614, | |
| "learning_rate": 2.719353579045388e-05, | |
| "epoch": 0.86, | |
| "step": 57550 | |
| }, | |
| { | |
| "loss": 1.1096, | |
| "grad_norm": 1.4989982843399048, | |
| "learning_rate": 2.711844041933255e-05, | |
| "epoch": 0.86, | |
| "step": 57575 | |
| }, | |
| { | |
| "loss": 1.0666, | |
| "grad_norm": 1.59767746925354, | |
| "learning_rate": 2.7043345048211227e-05, | |
| "epoch": 0.87, | |
| "step": 57600 | |
| }, | |
| { | |
| "loss": 1.125, | |
| "grad_norm": 1.3916709423065186, | |
| "learning_rate": 2.6968249677089906e-05, | |
| "epoch": 0.87, | |
| "step": 57625 | |
| }, | |
| { | |
| "loss": 1.038, | |
| "grad_norm": 1.6316527128219604, | |
| "learning_rate": 2.689315430596858e-05, | |
| "epoch": 0.87, | |
| "step": 57650 | |
| }, | |
| { | |
| "loss": 1.0536, | |
| "grad_norm": 1.4137283563613892, | |
| "learning_rate": 2.6818058934847257e-05, | |
| "epoch": 0.87, | |
| "step": 57675 | |
| }, | |
| { | |
| "loss": 1.0917, | |
| "grad_norm": 1.9997875690460205, | |
| "learning_rate": 2.6742963563725932e-05, | |
| "epoch": 0.87, | |
| "step": 57700 | |
| }, | |
| { | |
| "loss": 1.0194, | |
| "grad_norm": 1.3425413370132446, | |
| "learning_rate": 2.666786819260461e-05, | |
| "epoch": 0.87, | |
| "step": 57725 | |
| }, | |
| { | |
| "loss": 1.0669, | |
| "grad_norm": 0.9919766783714294, | |
| "learning_rate": 2.6592772821483287e-05, | |
| "epoch": 0.87, | |
| "step": 57750 | |
| }, | |
| { | |
| "loss": 1.15, | |
| "grad_norm": 1.5039972066879272, | |
| "learning_rate": 2.6517677450361962e-05, | |
| "epoch": 0.87, | |
| "step": 57775 | |
| }, | |
| { | |
| "loss": 1.0898, | |
| "grad_norm": 2.429229974746704, | |
| "learning_rate": 2.6442582079240634e-05, | |
| "epoch": 0.87, | |
| "step": 57800 | |
| }, | |
| { | |
| "loss": 1.1132, | |
| "grad_norm": 1.3402752876281738, | |
| "learning_rate": 2.6367486708119316e-05, | |
| "epoch": 0.87, | |
| "step": 57825 | |
| }, | |
| { | |
| "loss": 1.0645, | |
| "grad_norm": 1.041297435760498, | |
| "learning_rate": 2.629239133699799e-05, | |
| "epoch": 0.87, | |
| "step": 57850 | |
| }, | |
| { | |
| "loss": 1.105, | |
| "grad_norm": 1.0299885272979736, | |
| "learning_rate": 2.6217295965876664e-05, | |
| "epoch": 0.87, | |
| "step": 57875 | |
| }, | |
| { | |
| "loss": 1.1492, | |
| "grad_norm": 1.3811683654785156, | |
| "learning_rate": 2.614220059475534e-05, | |
| "epoch": 0.87, | |
| "step": 57900 | |
| }, | |
| { | |
| "loss": 1.1381, | |
| "grad_norm": 1.3449524641036987, | |
| "learning_rate": 2.6067105223634015e-05, | |
| "epoch": 0.87, | |
| "step": 57925 | |
| }, | |
| { | |
| "loss": 1.1077, | |
| "grad_norm": 1.1133577823638916, | |
| "learning_rate": 2.5995013667357543e-05, | |
| "epoch": 0.87, | |
| "step": 57950 | |
| }, | |
| { | |
| "loss": 1.1155, | |
| "grad_norm": 1.2379744052886963, | |
| "learning_rate": 2.5919918296236222e-05, | |
| "epoch": 0.87, | |
| "step": 57975 | |
| }, | |
| { | |
| "loss": 1.0956, | |
| "grad_norm": 1.4516429901123047, | |
| "learning_rate": 2.5844822925114898e-05, | |
| "epoch": 0.87, | |
| "step": 58000 | |
| }, | |
| { | |
| "loss": 1.157, | |
| "grad_norm": 2.1705074310302734, | |
| "learning_rate": 2.5769727553993573e-05, | |
| "epoch": 0.87, | |
| "step": 58025 | |
| }, | |
| { | |
| "loss": 1.1116, | |
| "grad_norm": 1.8582936525344849, | |
| "learning_rate": 2.5694632182872245e-05, | |
| "epoch": 0.87, | |
| "step": 58050 | |
| }, | |
| { | |
| "loss": 1.0901, | |
| "grad_norm": 1.2407045364379883, | |
| "learning_rate": 2.5619536811750928e-05, | |
| "epoch": 0.87, | |
| "step": 58075 | |
| }, | |
| { | |
| "loss": 1.0979, | |
| "grad_norm": 1.4852651357650757, | |
| "learning_rate": 2.5544441440629603e-05, | |
| "epoch": 0.87, | |
| "step": 58100 | |
| }, | |
| { | |
| "loss": 1.1655, | |
| "grad_norm": 1.1345808506011963, | |
| "learning_rate": 2.5469346069508275e-05, | |
| "epoch": 0.87, | |
| "step": 58125 | |
| }, | |
| { | |
| "loss": 1.1008, | |
| "grad_norm": 1.741289734840393, | |
| "learning_rate": 2.539425069838695e-05, | |
| "epoch": 0.87, | |
| "step": 58150 | |
| }, | |
| { | |
| "loss": 1.1831, | |
| "grad_norm": 1.26760995388031, | |
| "learning_rate": 2.532215914211048e-05, | |
| "epoch": 0.87, | |
| "step": 58175 | |
| }, | |
| { | |
| "loss": 1.0911, | |
| "grad_norm": 1.9289544820785522, | |
| "learning_rate": 2.524706377098916e-05, | |
| "epoch": 0.87, | |
| "step": 58200 | |
| }, | |
| { | |
| "loss": 1.0442, | |
| "grad_norm": 1.9321314096450806, | |
| "learning_rate": 2.5171968399867834e-05, | |
| "epoch": 0.87, | |
| "step": 58225 | |
| }, | |
| { | |
| "loss": 1.1097, | |
| "grad_norm": 1.1289350986480713, | |
| "learning_rate": 2.509687302874651e-05, | |
| "epoch": 0.87, | |
| "step": 58250 | |
| }, | |
| { | |
| "loss": 1.0505, | |
| "grad_norm": 1.3914735317230225, | |
| "learning_rate": 2.5021777657625184e-05, | |
| "epoch": 0.88, | |
| "step": 58275 | |
| }, | |
| { | |
| "loss": 1.0804, | |
| "grad_norm": 1.2914477586746216, | |
| "learning_rate": 2.494668228650386e-05, | |
| "epoch": 0.88, | |
| "step": 58300 | |
| }, | |
| { | |
| "loss": 1.064, | |
| "grad_norm": 1.1069772243499756, | |
| "learning_rate": 2.4871586915382535e-05, | |
| "epoch": 0.88, | |
| "step": 58325 | |
| }, | |
| { | |
| "loss": 1.0294, | |
| "grad_norm": 1.7709311246871948, | |
| "learning_rate": 2.4796491544261214e-05, | |
| "epoch": 0.88, | |
| "step": 58350 | |
| }, | |
| { | |
| "loss": 1.0995, | |
| "grad_norm": 1.3731812238693237, | |
| "learning_rate": 2.4721396173139886e-05, | |
| "epoch": 0.88, | |
| "step": 58375 | |
| }, | |
| { | |
| "loss": 1.0574, | |
| "grad_norm": 1.3423503637313843, | |
| "learning_rate": 2.4646300802018565e-05, | |
| "epoch": 0.88, | |
| "step": 58400 | |
| }, | |
| { | |
| "loss": 1.1297, | |
| "grad_norm": 1.5664671659469604, | |
| "learning_rate": 2.457120543089724e-05, | |
| "epoch": 0.88, | |
| "step": 58425 | |
| }, | |
| { | |
| "loss": 1.1686, | |
| "grad_norm": 1.7989689111709595, | |
| "learning_rate": 2.4496110059775916e-05, | |
| "epoch": 0.88, | |
| "step": 58450 | |
| }, | |
| { | |
| "loss": 1.063, | |
| "grad_norm": 2.50423526763916, | |
| "learning_rate": 2.442101468865459e-05, | |
| "epoch": 0.88, | |
| "step": 58475 | |
| }, | |
| { | |
| "loss": 1.1528, | |
| "grad_norm": 2.081894636154175, | |
| "learning_rate": 2.434591931753327e-05, | |
| "epoch": 0.88, | |
| "step": 58500 | |
| }, | |
| { | |
| "loss": 1.0845, | |
| "grad_norm": 1.7260534763336182, | |
| "learning_rate": 2.4270823946411943e-05, | |
| "epoch": 0.88, | |
| "step": 58525 | |
| }, | |
| { | |
| "loss": 1.1555, | |
| "grad_norm": 1.6785259246826172, | |
| "learning_rate": 2.419572857529062e-05, | |
| "epoch": 0.88, | |
| "step": 58550 | |
| }, | |
| { | |
| "loss": 1.1315, | |
| "grad_norm": 1.5150628089904785, | |
| "learning_rate": 2.4120633204169297e-05, | |
| "epoch": 0.88, | |
| "step": 58575 | |
| }, | |
| { | |
| "loss": 1.1247, | |
| "grad_norm": 1.3232154846191406, | |
| "learning_rate": 2.4045537833047972e-05, | |
| "epoch": 0.88, | |
| "step": 58600 | |
| }, | |
| { | |
| "loss": 1.1638, | |
| "grad_norm": 1.435685157775879, | |
| "learning_rate": 2.3970442461926648e-05, | |
| "epoch": 0.88, | |
| "step": 58625 | |
| }, | |
| { | |
| "loss": 1.1466, | |
| "grad_norm": 1.562098741531372, | |
| "learning_rate": 2.3895347090805323e-05, | |
| "epoch": 0.88, | |
| "step": 58650 | |
| }, | |
| { | |
| "loss": 1.0823, | |
| "grad_norm": 1.6774852275848389, | |
| "learning_rate": 2.3820251719684e-05, | |
| "epoch": 0.88, | |
| "step": 58675 | |
| }, | |
| { | |
| "loss": 1.1152, | |
| "grad_norm": 2.8691372871398926, | |
| "learning_rate": 2.3745156348562674e-05, | |
| "epoch": 0.88, | |
| "step": 58700 | |
| }, | |
| { | |
| "loss": 1.0675, | |
| "grad_norm": 1.2133371829986572, | |
| "learning_rate": 2.3670060977441353e-05, | |
| "epoch": 0.88, | |
| "step": 58725 | |
| }, | |
| { | |
| "loss": 1.1358, | |
| "grad_norm": 1.280999779701233, | |
| "learning_rate": 2.3594965606320025e-05, | |
| "epoch": 0.88, | |
| "step": 58750 | |
| }, | |
| { | |
| "loss": 1.073, | |
| "grad_norm": 2.144066333770752, | |
| "learning_rate": 2.3519870235198704e-05, | |
| "epoch": 0.88, | |
| "step": 58775 | |
| }, | |
| { | |
| "loss": 1.1203, | |
| "grad_norm": 1.4125479459762573, | |
| "learning_rate": 2.344477486407738e-05, | |
| "epoch": 0.88, | |
| "step": 58800 | |
| }, | |
| { | |
| "loss": 1.1467, | |
| "grad_norm": 1.402156949043274, | |
| "learning_rate": 2.3369679492956055e-05, | |
| "epoch": 0.88, | |
| "step": 58825 | |
| }, | |
| { | |
| "loss": 1.1691, | |
| "grad_norm": 1.3000797033309937, | |
| "learning_rate": 2.329458412183473e-05, | |
| "epoch": 0.88, | |
| "step": 58850 | |
| }, | |
| { | |
| "loss": 1.1105, | |
| "grad_norm": 1.9694422483444214, | |
| "learning_rate": 2.321948875071341e-05, | |
| "epoch": 0.88, | |
| "step": 58875 | |
| }, | |
| { | |
| "loss": 1.098, | |
| "grad_norm": 1.4404619932174683, | |
| "learning_rate": 2.314439337959208e-05, | |
| "epoch": 0.88, | |
| "step": 58900 | |
| }, | |
| { | |
| "loss": 1.0979, | |
| "grad_norm": 2.1054556369781494, | |
| "learning_rate": 2.3069298008470757e-05, | |
| "epoch": 0.88, | |
| "step": 58925 | |
| }, | |
| { | |
| "loss": 1.1015, | |
| "grad_norm": 1.2658005952835083, | |
| "learning_rate": 2.2994202637349436e-05, | |
| "epoch": 0.89, | |
| "step": 58950 | |
| }, | |
| { | |
| "loss": 1.1349, | |
| "grad_norm": 1.4039870500564575, | |
| "learning_rate": 2.291910726622811e-05, | |
| "epoch": 0.89, | |
| "step": 58975 | |
| }, | |
| { | |
| "loss": 1.0923, | |
| "grad_norm": 1.5480154752731323, | |
| "learning_rate": 2.2844011895106787e-05, | |
| "epoch": 0.89, | |
| "step": 59000 | |
| }, | |
| { | |
| "loss": 1.1652, | |
| "grad_norm": 1.9261832237243652, | |
| "learning_rate": 2.2768916523985462e-05, | |
| "epoch": 0.89, | |
| "step": 59025 | |
| }, | |
| { | |
| "loss": 1.0539, | |
| "grad_norm": 1.2835638523101807, | |
| "learning_rate": 2.2693821152864138e-05, | |
| "epoch": 0.89, | |
| "step": 59050 | |
| }, | |
| { | |
| "loss": 1.0895, | |
| "grad_norm": 1.7522798776626587, | |
| "learning_rate": 2.2618725781742813e-05, | |
| "epoch": 0.89, | |
| "step": 59075 | |
| }, | |
| { | |
| "loss": 1.0988, | |
| "grad_norm": 1.2995007038116455, | |
| "learning_rate": 2.2543630410621492e-05, | |
| "epoch": 0.89, | |
| "step": 59100 | |
| }, | |
| { | |
| "loss": 1.0471, | |
| "grad_norm": 1.5621485710144043, | |
| "learning_rate": 2.2468535039500164e-05, | |
| "epoch": 0.89, | |
| "step": 59125 | |
| }, | |
| { | |
| "loss": 1.1299, | |
| "grad_norm": 3.184175968170166, | |
| "learning_rate": 2.2393439668378843e-05, | |
| "epoch": 0.89, | |
| "step": 59150 | |
| }, | |
| { | |
| "loss": 1.201, | |
| "grad_norm": 1.7400543689727783, | |
| "learning_rate": 2.231834429725752e-05, | |
| "epoch": 0.89, | |
| "step": 59175 | |
| }, | |
| { | |
| "loss": 1.148, | |
| "grad_norm": 1.880234956741333, | |
| "learning_rate": 2.2243248926136194e-05, | |
| "epoch": 0.89, | |
| "step": 59200 | |
| }, | |
| { | |
| "loss": 1.0385, | |
| "grad_norm": 1.2461950778961182, | |
| "learning_rate": 2.216815355501487e-05, | |
| "epoch": 0.89, | |
| "step": 59225 | |
| }, | |
| { | |
| "loss": 1.1849, | |
| "grad_norm": 2.8920862674713135, | |
| "learning_rate": 2.2093058183893545e-05, | |
| "epoch": 0.89, | |
| "step": 59250 | |
| }, | |
| { | |
| "loss": 1.0813, | |
| "grad_norm": 1.3439332246780396, | |
| "learning_rate": 2.201796281277222e-05, | |
| "epoch": 0.89, | |
| "step": 59275 | |
| }, | |
| { | |
| "loss": 1.0912, | |
| "grad_norm": 1.2441843748092651, | |
| "learning_rate": 2.1942867441650896e-05, | |
| "epoch": 0.89, | |
| "step": 59300 | |
| }, | |
| { | |
| "loss": 1.121, | |
| "grad_norm": 1.5612194538116455, | |
| "learning_rate": 2.1867772070529575e-05, | |
| "epoch": 0.89, | |
| "step": 59325 | |
| }, | |
| { | |
| "loss": 1.1187, | |
| "grad_norm": 2.292187213897705, | |
| "learning_rate": 2.179267669940825e-05, | |
| "epoch": 0.89, | |
| "step": 59350 | |
| }, | |
| { | |
| "loss": 1.098, | |
| "grad_norm": 1.3217053413391113, | |
| "learning_rate": 2.1717581328286926e-05, | |
| "epoch": 0.89, | |
| "step": 59375 | |
| }, | |
| { | |
| "loss": 1.07, | |
| "grad_norm": 2.108124017715454, | |
| "learning_rate": 2.16424859571656e-05, | |
| "epoch": 0.89, | |
| "step": 59400 | |
| }, | |
| { | |
| "loss": 1.1967, | |
| "grad_norm": 1.461854100227356, | |
| "learning_rate": 2.1567390586044277e-05, | |
| "epoch": 0.89, | |
| "step": 59425 | |
| }, | |
| { | |
| "loss": 1.0829, | |
| "grad_norm": 2.4140448570251465, | |
| "learning_rate": 2.1492295214922952e-05, | |
| "epoch": 0.89, | |
| "step": 59450 | |
| }, | |
| { | |
| "loss": 1.084, | |
| "grad_norm": 1.3833210468292236, | |
| "learning_rate": 2.141719984380163e-05, | |
| "epoch": 0.89, | |
| "step": 59475 | |
| }, | |
| { | |
| "loss": 1.0312, | |
| "grad_norm": 1.463707447052002, | |
| "learning_rate": 2.1342104472680303e-05, | |
| "epoch": 0.89, | |
| "step": 59500 | |
| }, | |
| { | |
| "loss": 1.0947, | |
| "grad_norm": 1.0634888410568237, | |
| "learning_rate": 2.1267009101558982e-05, | |
| "epoch": 0.89, | |
| "step": 59525 | |
| }, | |
| { | |
| "loss": 1.1448, | |
| "grad_norm": 1.6115715503692627, | |
| "learning_rate": 2.1191913730437658e-05, | |
| "epoch": 0.89, | |
| "step": 59550 | |
| }, | |
| { | |
| "loss": 1.1279, | |
| "grad_norm": 2.023573398590088, | |
| "learning_rate": 2.1116818359316333e-05, | |
| "epoch": 0.89, | |
| "step": 59575 | |
| }, | |
| { | |
| "loss": 1.0725, | |
| "grad_norm": 1.3353021144866943, | |
| "learning_rate": 2.104172298819501e-05, | |
| "epoch": 0.9, | |
| "step": 59600 | |
| }, | |
| { | |
| "loss": 1.1597, | |
| "grad_norm": 2.067376136779785, | |
| "learning_rate": 2.0966627617073684e-05, | |
| "epoch": 0.9, | |
| "step": 59625 | |
| }, | |
| { | |
| "loss": 1.0665, | |
| "grad_norm": 1.4394888877868652, | |
| "learning_rate": 2.089153224595236e-05, | |
| "epoch": 0.9, | |
| "step": 59650 | |
| }, | |
| { | |
| "loss": 1.1449, | |
| "grad_norm": 1.1642546653747559, | |
| "learning_rate": 2.0816436874831035e-05, | |
| "epoch": 0.9, | |
| "step": 59675 | |
| }, | |
| { | |
| "loss": 1.0973, | |
| "grad_norm": 1.6994637250900269, | |
| "learning_rate": 2.0741341503709714e-05, | |
| "epoch": 0.9, | |
| "step": 59700 | |
| }, | |
| { | |
| "loss": 1.1076, | |
| "grad_norm": 2.0998518466949463, | |
| "learning_rate": 2.066624613258839e-05, | |
| "epoch": 0.9, | |
| "step": 59725 | |
| }, | |
| { | |
| "loss": 1.1332, | |
| "grad_norm": 1.608519196510315, | |
| "learning_rate": 2.0591150761467065e-05, | |
| "epoch": 0.9, | |
| "step": 59750 | |
| }, | |
| { | |
| "loss": 1.0639, | |
| "grad_norm": 5.120492935180664, | |
| "learning_rate": 2.051605539034574e-05, | |
| "epoch": 0.9, | |
| "step": 59775 | |
| }, | |
| { | |
| "loss": 1.1295, | |
| "grad_norm": 1.2980087995529175, | |
| "learning_rate": 2.0440960019224416e-05, | |
| "epoch": 0.9, | |
| "step": 59800 | |
| }, | |
| { | |
| "loss": 1.0998, | |
| "grad_norm": 1.518433928489685, | |
| "learning_rate": 2.036586464810309e-05, | |
| "epoch": 0.9, | |
| "step": 59825 | |
| }, | |
| { | |
| "loss": 1.0911, | |
| "grad_norm": 1.1310094594955444, | |
| "learning_rate": 2.0290769276981767e-05, | |
| "epoch": 0.9, | |
| "step": 59850 | |
| }, | |
| { | |
| "loss": 1.1567, | |
| "grad_norm": 0.9931915998458862, | |
| "learning_rate": 2.0215673905860442e-05, | |
| "epoch": 0.9, | |
| "step": 59875 | |
| }, | |
| { | |
| "loss": 1.1115, | |
| "grad_norm": 2.011012077331543, | |
| "learning_rate": 2.0140578534739118e-05, | |
| "epoch": 0.9, | |
| "step": 59900 | |
| }, | |
| { | |
| "loss": 1.119, | |
| "grad_norm": 1.6782035827636719, | |
| "learning_rate": 2.0065483163617797e-05, | |
| "epoch": 0.9, | |
| "step": 59925 | |
| }, | |
| { | |
| "loss": 1.0836, | |
| "grad_norm": 1.6010968685150146, | |
| "learning_rate": 1.9990387792496472e-05, | |
| "epoch": 0.9, | |
| "step": 59950 | |
| }, | |
| { | |
| "loss": 1.0171, | |
| "grad_norm": 1.8368406295776367, | |
| "learning_rate": 1.9915292421375148e-05, | |
| "epoch": 0.9, | |
| "step": 59975 | |
| }, | |
| { | |
| "loss": 1.1612, | |
| "grad_norm": 1.6433417797088623, | |
| "learning_rate": 1.9840197050253823e-05, | |
| "epoch": 0.9, | |
| "step": 60000 | |
| }, | |
| { | |
| "loss": 1.1051, | |
| "grad_norm": 1.0590778589248657, | |
| "learning_rate": 1.97651016791325e-05, | |
| "epoch": 0.9, | |
| "step": 60025 | |
| }, | |
| { | |
| "loss": 1.1467, | |
| "grad_norm": 2.4711523056030273, | |
| "learning_rate": 1.9690006308011174e-05, | |
| "epoch": 0.9, | |
| "step": 60050 | |
| }, | |
| { | |
| "loss": 1.1249, | |
| "grad_norm": 1.817872166633606, | |
| "learning_rate": 1.9614910936889853e-05, | |
| "epoch": 0.9, | |
| "step": 60075 | |
| }, | |
| { | |
| "loss": 1.1509, | |
| "grad_norm": 1.9354240894317627, | |
| "learning_rate": 1.953981556576853e-05, | |
| "epoch": 0.9, | |
| "step": 60100 | |
| }, | |
| { | |
| "loss": 1.1021, | |
| "grad_norm": 2.382876396179199, | |
| "learning_rate": 1.9464720194647204e-05, | |
| "epoch": 0.9, | |
| "step": 60125 | |
| }, | |
| { | |
| "loss": 1.0521, | |
| "grad_norm": 1.6886651515960693, | |
| "learning_rate": 1.938962482352588e-05, | |
| "epoch": 0.9, | |
| "step": 60150 | |
| }, | |
| { | |
| "loss": 1.0828, | |
| "grad_norm": 1.0179933309555054, | |
| "learning_rate": 1.9314529452404555e-05, | |
| "epoch": 0.9, | |
| "step": 60175 | |
| }, | |
| { | |
| "loss": 1.0767, | |
| "grad_norm": 1.041438102722168, | |
| "learning_rate": 1.923943408128323e-05, | |
| "epoch": 0.9, | |
| "step": 60200 | |
| }, | |
| { | |
| "loss": 1.0739, | |
| "grad_norm": 1.1048403978347778, | |
| "learning_rate": 1.9164338710161906e-05, | |
| "epoch": 0.9, | |
| "step": 60225 | |
| }, | |
| { | |
| "loss": 1.1025, | |
| "grad_norm": 1.113214373588562, | |
| "learning_rate": 1.908924333904058e-05, | |
| "epoch": 0.9, | |
| "step": 60250 | |
| }, | |
| { | |
| "loss": 1.1523, | |
| "grad_norm": 2.7546420097351074, | |
| "learning_rate": 1.9014147967919257e-05, | |
| "epoch": 0.91, | |
| "step": 60275 | |
| }, | |
| { | |
| "loss": 1.043, | |
| "grad_norm": 1.3055835962295532, | |
| "learning_rate": 1.8939052596797936e-05, | |
| "epoch": 0.91, | |
| "step": 60300 | |
| }, | |
| { | |
| "loss": 1.0768, | |
| "grad_norm": 1.9900767803192139, | |
| "learning_rate": 1.8866961040521464e-05, | |
| "epoch": 0.91, | |
| "step": 60325 | |
| }, | |
| { | |
| "loss": 1.1571, | |
| "grad_norm": 1.605908751487732, | |
| "learning_rate": 1.879186566940014e-05, | |
| "epoch": 0.91, | |
| "step": 60350 | |
| }, | |
| { | |
| "loss": 1.1399, | |
| "grad_norm": 1.9245578050613403, | |
| "learning_rate": 1.8716770298278815e-05, | |
| "epoch": 0.91, | |
| "step": 60375 | |
| }, | |
| { | |
| "loss": 1.0959, | |
| "grad_norm": 1.897222638130188, | |
| "learning_rate": 1.864167492715749e-05, | |
| "epoch": 0.91, | |
| "step": 60400 | |
| }, | |
| { | |
| "loss": 1.1132, | |
| "grad_norm": 2.4311060905456543, | |
| "learning_rate": 1.856657955603617e-05, | |
| "epoch": 0.91, | |
| "step": 60425 | |
| }, | |
| { | |
| "loss": 1.1102, | |
| "grad_norm": 1.378459095954895, | |
| "learning_rate": 1.849148418491484e-05, | |
| "epoch": 0.91, | |
| "step": 60450 | |
| }, | |
| { | |
| "loss": 1.0765, | |
| "grad_norm": 2.5490572452545166, | |
| "learning_rate": 1.841638881379352e-05, | |
| "epoch": 0.91, | |
| "step": 60475 | |
| }, | |
| { | |
| "loss": 1.1314, | |
| "grad_norm": 1.2700508832931519, | |
| "learning_rate": 1.8341293442672196e-05, | |
| "epoch": 0.91, | |
| "step": 60500 | |
| }, | |
| { | |
| "loss": 1.1452, | |
| "grad_norm": 1.636888027191162, | |
| "learning_rate": 1.826619807155087e-05, | |
| "epoch": 0.91, | |
| "step": 60525 | |
| }, | |
| { | |
| "loss": 1.0367, | |
| "grad_norm": 1.4893200397491455, | |
| "learning_rate": 1.8191102700429547e-05, | |
| "epoch": 0.91, | |
| "step": 60550 | |
| }, | |
| { | |
| "loss": 1.0222, | |
| "grad_norm": 0.9594138860702515, | |
| "learning_rate": 1.8116007329308222e-05, | |
| "epoch": 0.91, | |
| "step": 60575 | |
| }, | |
| { | |
| "loss": 1.115, | |
| "grad_norm": 2.1128294467926025, | |
| "learning_rate": 1.8040911958186898e-05, | |
| "epoch": 0.91, | |
| "step": 60600 | |
| }, | |
| { | |
| "loss": 1.1439, | |
| "grad_norm": 1.5294193029403687, | |
| "learning_rate": 1.7965816587065573e-05, | |
| "epoch": 0.91, | |
| "step": 60625 | |
| }, | |
| { | |
| "loss": 1.121, | |
| "grad_norm": 1.5716066360473633, | |
| "learning_rate": 1.7890721215944252e-05, | |
| "epoch": 0.91, | |
| "step": 60650 | |
| }, | |
| { | |
| "loss": 1.0839, | |
| "grad_norm": 1.2781248092651367, | |
| "learning_rate": 1.7815625844822924e-05, | |
| "epoch": 0.91, | |
| "step": 60675 | |
| }, | |
| { | |
| "loss": 1.1282, | |
| "grad_norm": 1.1077611446380615, | |
| "learning_rate": 1.7740530473701603e-05, | |
| "epoch": 0.91, | |
| "step": 60700 | |
| }, | |
| { | |
| "loss": 1.1352, | |
| "grad_norm": 1.1450996398925781, | |
| "learning_rate": 1.766543510258028e-05, | |
| "epoch": 0.91, | |
| "step": 60725 | |
| }, | |
| { | |
| "loss": 1.0536, | |
| "grad_norm": 1.3349822759628296, | |
| "learning_rate": 1.7590339731458954e-05, | |
| "epoch": 0.91, | |
| "step": 60750 | |
| }, | |
| { | |
| "loss": 1.13, | |
| "grad_norm": 1.6306883096694946, | |
| "learning_rate": 1.751524436033763e-05, | |
| "epoch": 0.91, | |
| "step": 60775 | |
| }, | |
| { | |
| "loss": 1.1103, | |
| "grad_norm": 1.7724149227142334, | |
| "learning_rate": 1.744014898921631e-05, | |
| "epoch": 0.91, | |
| "step": 60800 | |
| }, | |
| { | |
| "loss": 1.0242, | |
| "grad_norm": 1.5324548482894897, | |
| "learning_rate": 1.736505361809498e-05, | |
| "epoch": 0.91, | |
| "step": 60825 | |
| }, | |
| { | |
| "loss": 1.0809, | |
| "grad_norm": 0.8965089917182922, | |
| "learning_rate": 1.728995824697366e-05, | |
| "epoch": 0.91, | |
| "step": 60850 | |
| }, | |
| { | |
| "loss": 1.1012, | |
| "grad_norm": 1.551774263381958, | |
| "learning_rate": 1.7214862875852335e-05, | |
| "epoch": 0.91, | |
| "step": 60875 | |
| }, | |
| { | |
| "loss": 1.1264, | |
| "grad_norm": 1.070957064628601, | |
| "learning_rate": 1.7139767504731007e-05, | |
| "epoch": 0.91, | |
| "step": 60900 | |
| }, | |
| { | |
| "loss": 1.1098, | |
| "grad_norm": 1.5298128128051758, | |
| "learning_rate": 1.7064672133609686e-05, | |
| "epoch": 0.92, | |
| "step": 60925 | |
| }, | |
| { | |
| "loss": 1.089, | |
| "grad_norm": 1.6738872528076172, | |
| "learning_rate": 1.698957676248836e-05, | |
| "epoch": 0.92, | |
| "step": 60950 | |
| }, | |
| { | |
| "loss": 1.023, | |
| "grad_norm": 1.2859163284301758, | |
| "learning_rate": 1.6914481391367037e-05, | |
| "epoch": 0.92, | |
| "step": 60975 | |
| }, | |
| { | |
| "loss": 1.227, | |
| "grad_norm": 1.181386113166809, | |
| "learning_rate": 1.6839386020245712e-05, | |
| "epoch": 0.92, | |
| "step": 61000 | |
| }, | |
| { | |
| "loss": 1.1462, | |
| "grad_norm": 1.9334174394607544, | |
| "learning_rate": 1.676429064912439e-05, | |
| "epoch": 0.92, | |
| "step": 61025 | |
| }, | |
| { | |
| "loss": 1.0915, | |
| "grad_norm": 1.1935040950775146, | |
| "learning_rate": 1.6689195278003063e-05, | |
| "epoch": 0.92, | |
| "step": 61050 | |
| }, | |
| { | |
| "loss": 1.029, | |
| "grad_norm": 1.1765645742416382, | |
| "learning_rate": 1.6614099906881742e-05, | |
| "epoch": 0.92, | |
| "step": 61075 | |
| }, | |
| { | |
| "loss": 1.15, | |
| "grad_norm": 2.0349085330963135, | |
| "learning_rate": 1.6539004535760417e-05, | |
| "epoch": 0.92, | |
| "step": 61100 | |
| }, | |
| { | |
| "loss": 1.1565, | |
| "grad_norm": 2.330791711807251, | |
| "learning_rate": 1.6463909164639093e-05, | |
| "epoch": 0.92, | |
| "step": 61125 | |
| }, | |
| { | |
| "loss": 1.0929, | |
| "grad_norm": 1.6865901947021484, | |
| "learning_rate": 1.638881379351777e-05, | |
| "epoch": 0.92, | |
| "step": 61150 | |
| }, | |
| { | |
| "loss": 1.199, | |
| "grad_norm": 2.1335840225219727, | |
| "learning_rate": 1.6313718422396444e-05, | |
| "epoch": 0.92, | |
| "step": 61175 | |
| }, | |
| { | |
| "loss": 1.1875, | |
| "grad_norm": 0.9578272104263306, | |
| "learning_rate": 1.623862305127512e-05, | |
| "epoch": 0.92, | |
| "step": 61200 | |
| }, | |
| { | |
| "loss": 1.117, | |
| "grad_norm": 1.564257025718689, | |
| "learning_rate": 1.6163527680153795e-05, | |
| "epoch": 0.92, | |
| "step": 61225 | |
| }, | |
| { | |
| "loss": 1.1286, | |
| "grad_norm": 2.076204538345337, | |
| "learning_rate": 1.6088432309032474e-05, | |
| "epoch": 0.92, | |
| "step": 61250 | |
| }, | |
| { | |
| "loss": 1.1491, | |
| "grad_norm": 1.695163607597351, | |
| "learning_rate": 1.6013336937911146e-05, | |
| "epoch": 0.92, | |
| "step": 61275 | |
| }, | |
| { | |
| "loss": 1.1108, | |
| "grad_norm": 1.0644354820251465, | |
| "learning_rate": 1.5938241566789825e-05, | |
| "epoch": 0.92, | |
| "step": 61300 | |
| }, | |
| { | |
| "loss": 1.0865, | |
| "grad_norm": 1.13369619846344, | |
| "learning_rate": 1.58631461956685e-05, | |
| "epoch": 0.92, | |
| "step": 61325 | |
| }, | |
| { | |
| "loss": 1.09, | |
| "grad_norm": 0.8873293995857239, | |
| "learning_rate": 1.5788050824547176e-05, | |
| "epoch": 0.92, | |
| "step": 61350 | |
| }, | |
| { | |
| "loss": 1.1836, | |
| "grad_norm": 1.4285056591033936, | |
| "learning_rate": 1.571295545342585e-05, | |
| "epoch": 0.92, | |
| "step": 61375 | |
| }, | |
| { | |
| "loss": 1.1597, | |
| "grad_norm": 0.8853715658187866, | |
| "learning_rate": 1.563786008230453e-05, | |
| "epoch": 0.92, | |
| "step": 61400 | |
| }, | |
| { | |
| "loss": 1.1089, | |
| "grad_norm": 1.2858846187591553, | |
| "learning_rate": 1.5562764711183202e-05, | |
| "epoch": 0.92, | |
| "step": 61425 | |
| }, | |
| { | |
| "loss": 1.1041, | |
| "grad_norm": 1.2523924112319946, | |
| "learning_rate": 1.548766934006188e-05, | |
| "epoch": 0.92, | |
| "step": 61450 | |
| }, | |
| { | |
| "loss": 1.1523, | |
| "grad_norm": 1.9986999034881592, | |
| "learning_rate": 1.5412573968940556e-05, | |
| "epoch": 0.92, | |
| "step": 61475 | |
| }, | |
| { | |
| "loss": 1.0461, | |
| "grad_norm": 2.06295108795166, | |
| "learning_rate": 1.533747859781923e-05, | |
| "epoch": 0.92, | |
| "step": 61500 | |
| }, | |
| { | |
| "loss": 1.0988, | |
| "grad_norm": 0.9304774403572083, | |
| "learning_rate": 1.5262383226697907e-05, | |
| "epoch": 0.92, | |
| "step": 61525 | |
| }, | |
| { | |
| "loss": 1.1569, | |
| "grad_norm": 1.6813061237335205, | |
| "learning_rate": 1.5187287855576581e-05, | |
| "epoch": 0.92, | |
| "step": 61550 | |
| }, | |
| { | |
| "loss": 1.1434, | |
| "grad_norm": 1.320822834968567, | |
| "learning_rate": 1.511219248445526e-05, | |
| "epoch": 0.92, | |
| "step": 61575 | |
| }, | |
| { | |
| "loss": 1.0182, | |
| "grad_norm": 1.2629307508468628, | |
| "learning_rate": 1.5037097113333934e-05, | |
| "epoch": 0.93, | |
| "step": 61600 | |
| }, | |
| { | |
| "loss": 1.0842, | |
| "grad_norm": 2.044494390487671, | |
| "learning_rate": 1.4962001742212611e-05, | |
| "epoch": 0.93, | |
| "step": 61625 | |
| }, | |
| { | |
| "loss": 1.0935, | |
| "grad_norm": 1.1815024614334106, | |
| "learning_rate": 1.4886906371091286e-05, | |
| "epoch": 0.93, | |
| "step": 61650 | |
| }, | |
| { | |
| "loss": 1.1862, | |
| "grad_norm": 1.5776236057281494, | |
| "learning_rate": 1.4811810999969964e-05, | |
| "epoch": 0.93, | |
| "step": 61675 | |
| }, | |
| { | |
| "loss": 1.0616, | |
| "grad_norm": 2.1838979721069336, | |
| "learning_rate": 1.4736715628848637e-05, | |
| "epoch": 0.93, | |
| "step": 61700 | |
| }, | |
| { | |
| "loss": 1.1325, | |
| "grad_norm": 1.7168885469436646, | |
| "learning_rate": 1.4661620257727315e-05, | |
| "epoch": 0.93, | |
| "step": 61725 | |
| }, | |
| { | |
| "loss": 1.1015, | |
| "grad_norm": 1.0847703218460083, | |
| "learning_rate": 1.458652488660599e-05, | |
| "epoch": 0.93, | |
| "step": 61750 | |
| }, | |
| { | |
| "loss": 1.0838, | |
| "grad_norm": 1.3423173427581787, | |
| "learning_rate": 1.4511429515484667e-05, | |
| "epoch": 0.93, | |
| "step": 61775 | |
| }, | |
| { | |
| "loss": 1.1676, | |
| "grad_norm": 1.405914306640625, | |
| "learning_rate": 1.4436334144363343e-05, | |
| "epoch": 0.93, | |
| "step": 61800 | |
| }, | |
| { | |
| "loss": 1.1124, | |
| "grad_norm": 1.570953607559204, | |
| "learning_rate": 1.4361238773242017e-05, | |
| "epoch": 0.93, | |
| "step": 61825 | |
| }, | |
| { | |
| "loss": 1.1462, | |
| "grad_norm": 2.7975118160247803, | |
| "learning_rate": 1.4286143402120694e-05, | |
| "epoch": 0.93, | |
| "step": 61850 | |
| }, | |
| { | |
| "loss": 1.117, | |
| "grad_norm": 3.7135069370269775, | |
| "learning_rate": 1.421104803099937e-05, | |
| "epoch": 0.93, | |
| "step": 61875 | |
| }, | |
| { | |
| "loss": 1.1057, | |
| "grad_norm": 1.8608477115631104, | |
| "learning_rate": 1.4135952659878046e-05, | |
| "epoch": 0.93, | |
| "step": 61900 | |
| }, | |
| { | |
| "loss": 1.0607, | |
| "grad_norm": 1.1181379556655884, | |
| "learning_rate": 1.406085728875672e-05, | |
| "epoch": 0.93, | |
| "step": 61925 | |
| }, | |
| { | |
| "loss": 1.1118, | |
| "grad_norm": 1.5385795831680298, | |
| "learning_rate": 1.3985761917635399e-05, | |
| "epoch": 0.93, | |
| "step": 61950 | |
| }, | |
| { | |
| "loss": 1.1192, | |
| "grad_norm": 1.4369099140167236, | |
| "learning_rate": 1.3910666546514073e-05, | |
| "epoch": 0.93, | |
| "step": 61975 | |
| }, | |
| { | |
| "loss": 1.1036, | |
| "grad_norm": 1.2244880199432373, | |
| "learning_rate": 1.383557117539275e-05, | |
| "epoch": 0.93, | |
| "step": 62000 | |
| }, | |
| { | |
| "loss": 1.0913, | |
| "grad_norm": 1.3832460641860962, | |
| "learning_rate": 1.3760475804271425e-05, | |
| "epoch": 0.93, | |
| "step": 62025 | |
| }, | |
| { | |
| "loss": 1.1684, | |
| "grad_norm": 0.9169008135795593, | |
| "learning_rate": 1.3685380433150103e-05, | |
| "epoch": 0.93, | |
| "step": 62050 | |
| }, | |
| { | |
| "loss": 1.1886, | |
| "grad_norm": 2.110548973083496, | |
| "learning_rate": 1.3610285062028776e-05, | |
| "epoch": 0.93, | |
| "step": 62075 | |
| }, | |
| { | |
| "loss": 1.1511, | |
| "grad_norm": 1.203637957572937, | |
| "learning_rate": 1.3535189690907454e-05, | |
| "epoch": 0.93, | |
| "step": 62100 | |
| }, | |
| { | |
| "loss": 1.1469, | |
| "grad_norm": 1.3341647386550903, | |
| "learning_rate": 1.3460094319786129e-05, | |
| "epoch": 0.93, | |
| "step": 62125 | |
| }, | |
| { | |
| "loss": 1.1022, | |
| "grad_norm": 1.5815610885620117, | |
| "learning_rate": 1.3384998948664803e-05, | |
| "epoch": 0.93, | |
| "step": 62150 | |
| }, | |
| { | |
| "loss": 1.0537, | |
| "grad_norm": 1.7284424304962158, | |
| "learning_rate": 1.3309903577543482e-05, | |
| "epoch": 0.93, | |
| "step": 62175 | |
| }, | |
| { | |
| "loss": 1.0917, | |
| "grad_norm": 1.2951127290725708, | |
| "learning_rate": 1.3234808206422156e-05, | |
| "epoch": 0.93, | |
| "step": 62200 | |
| }, | |
| { | |
| "loss": 1.0818, | |
| "grad_norm": 1.735390305519104, | |
| "learning_rate": 1.3159712835300833e-05, | |
| "epoch": 0.93, | |
| "step": 62225 | |
| }, | |
| { | |
| "loss": 1.1024, | |
| "grad_norm": 0.9933769702911377, | |
| "learning_rate": 1.3084617464179508e-05, | |
| "epoch": 0.93, | |
| "step": 62250 | |
| }, | |
| { | |
| "loss": 1.1004, | |
| "grad_norm": 1.7689695358276367, | |
| "learning_rate": 1.3009522093058185e-05, | |
| "epoch": 0.94, | |
| "step": 62275 | |
| }, | |
| { | |
| "loss": 1.1127, | |
| "grad_norm": 1.0094436407089233, | |
| "learning_rate": 1.2934426721936859e-05, | |
| "epoch": 0.94, | |
| "step": 62300 | |
| }, | |
| { | |
| "loss": 1.086, | |
| "grad_norm": 1.3532946109771729, | |
| "learning_rate": 1.2859331350815538e-05, | |
| "epoch": 0.94, | |
| "step": 62325 | |
| }, | |
| { | |
| "loss": 1.1568, | |
| "grad_norm": 1.7976974248886108, | |
| "learning_rate": 1.2784235979694212e-05, | |
| "epoch": 0.94, | |
| "step": 62350 | |
| }, | |
| { | |
| "loss": 1.1215, | |
| "grad_norm": 1.748487114906311, | |
| "learning_rate": 1.2709140608572889e-05, | |
| "epoch": 0.94, | |
| "step": 62375 | |
| }, | |
| { | |
| "loss": 1.0682, | |
| "grad_norm": 1.3425058126449585, | |
| "learning_rate": 1.2634045237451564e-05, | |
| "epoch": 0.94, | |
| "step": 62400 | |
| }, | |
| { | |
| "loss": 1.0849, | |
| "grad_norm": 2.302241802215576, | |
| "learning_rate": 1.2558949866330242e-05, | |
| "epoch": 0.94, | |
| "step": 62425 | |
| }, | |
| { | |
| "loss": 1.0932, | |
| "grad_norm": 1.4172135591506958, | |
| "learning_rate": 1.2483854495208915e-05, | |
| "epoch": 0.94, | |
| "step": 62450 | |
| }, | |
| { | |
| "loss": 1.1309, | |
| "grad_norm": 1.8036898374557495, | |
| "learning_rate": 1.2408759124087593e-05, | |
| "epoch": 0.94, | |
| "step": 62475 | |
| }, | |
| { | |
| "loss": 1.1123, | |
| "grad_norm": 2.0429811477661133, | |
| "learning_rate": 1.2333663752966268e-05, | |
| "epoch": 0.94, | |
| "step": 62500 | |
| }, | |
| { | |
| "loss": 1.1829, | |
| "grad_norm": 1.2276302576065063, | |
| "learning_rate": 1.2258568381844944e-05, | |
| "epoch": 0.94, | |
| "step": 62525 | |
| }, | |
| { | |
| "loss": 1.1149, | |
| "grad_norm": 1.4691849946975708, | |
| "learning_rate": 1.218347301072362e-05, | |
| "epoch": 0.94, | |
| "step": 62550 | |
| }, | |
| { | |
| "loss": 1.1291, | |
| "grad_norm": 1.780098557472229, | |
| "learning_rate": 1.2108377639602296e-05, | |
| "epoch": 0.94, | |
| "step": 62575 | |
| }, | |
| { | |
| "loss": 1.0855, | |
| "grad_norm": 1.4932245016098022, | |
| "learning_rate": 1.2033282268480972e-05, | |
| "epoch": 0.94, | |
| "step": 62600 | |
| }, | |
| { | |
| "loss": 1.0832, | |
| "grad_norm": 1.277098536491394, | |
| "learning_rate": 1.1958186897359647e-05, | |
| "epoch": 0.94, | |
| "step": 62625 | |
| }, | |
| { | |
| "loss": 1.2369, | |
| "grad_norm": 1.7345349788665771, | |
| "learning_rate": 1.1883091526238323e-05, | |
| "epoch": 0.94, | |
| "step": 62650 | |
| }, | |
| { | |
| "loss": 1.1604, | |
| "grad_norm": 1.9038455486297607, | |
| "learning_rate": 1.1807996155116998e-05, | |
| "epoch": 0.94, | |
| "step": 62675 | |
| }, | |
| { | |
| "loss": 1.1065, | |
| "grad_norm": 1.0243260860443115, | |
| "learning_rate": 1.1732900783995675e-05, | |
| "epoch": 0.94, | |
| "step": 62700 | |
| }, | |
| { | |
| "loss": 1.1043, | |
| "grad_norm": 0.9342716336250305, | |
| "learning_rate": 1.165780541287435e-05, | |
| "epoch": 0.94, | |
| "step": 62725 | |
| }, | |
| { | |
| "loss": 1.1245, | |
| "grad_norm": 1.554945707321167, | |
| "learning_rate": 1.1582710041753026e-05, | |
| "epoch": 0.94, | |
| "step": 62750 | |
| }, | |
| { | |
| "loss": 1.074, | |
| "grad_norm": 1.1340545415878296, | |
| "learning_rate": 1.1507614670631703e-05, | |
| "epoch": 0.94, | |
| "step": 62775 | |
| }, | |
| { | |
| "loss": 1.1226, | |
| "grad_norm": 2.2141757011413574, | |
| "learning_rate": 1.1432519299510379e-05, | |
| "epoch": 0.94, | |
| "step": 62800 | |
| }, | |
| { | |
| "loss": 1.1022, | |
| "grad_norm": 1.2455902099609375, | |
| "learning_rate": 1.1357423928389054e-05, | |
| "epoch": 0.94, | |
| "step": 62825 | |
| }, | |
| { | |
| "loss": 1.135, | |
| "grad_norm": 1.0841847658157349, | |
| "learning_rate": 1.1282328557267732e-05, | |
| "epoch": 0.94, | |
| "step": 62850 | |
| }, | |
| { | |
| "loss": 1.1242, | |
| "grad_norm": 2.3354759216308594, | |
| "learning_rate": 1.1207233186146407e-05, | |
| "epoch": 0.94, | |
| "step": 62875 | |
| }, | |
| { | |
| "loss": 1.1036, | |
| "grad_norm": 1.0070022344589233, | |
| "learning_rate": 1.1132137815025083e-05, | |
| "epoch": 0.94, | |
| "step": 62900 | |
| }, | |
| { | |
| "loss": 1.0723, | |
| "grad_norm": 1.8489924669265747, | |
| "learning_rate": 1.105704244390376e-05, | |
| "epoch": 0.95, | |
| "step": 62925 | |
| }, | |
| { | |
| "loss": 1.0952, | |
| "grad_norm": 1.4337140321731567, | |
| "learning_rate": 1.0981947072782433e-05, | |
| "epoch": 0.95, | |
| "step": 62950 | |
| }, | |
| { | |
| "loss": 1.092, | |
| "grad_norm": 1.2222257852554321, | |
| "learning_rate": 1.0906851701661109e-05, | |
| "epoch": 0.95, | |
| "step": 62975 | |
| }, | |
| { | |
| "loss": 1.1448, | |
| "grad_norm": 1.270473837852478, | |
| "learning_rate": 1.0831756330539786e-05, | |
| "epoch": 0.95, | |
| "step": 63000 | |
| }, | |
| { | |
| "loss": 1.186, | |
| "grad_norm": 2.165717363357544, | |
| "learning_rate": 1.0756660959418462e-05, | |
| "epoch": 0.95, | |
| "step": 63025 | |
| }, | |
| { | |
| "loss": 1.0859, | |
| "grad_norm": 1.2544116973876953, | |
| "learning_rate": 1.0681565588297137e-05, | |
| "epoch": 0.95, | |
| "step": 63050 | |
| }, | |
| { | |
| "loss": 1.1105, | |
| "grad_norm": 1.404388666152954, | |
| "learning_rate": 1.0606470217175814e-05, | |
| "epoch": 0.95, | |
| "step": 63075 | |
| }, | |
| { | |
| "loss": 1.1805, | |
| "grad_norm": 1.3540233373641968, | |
| "learning_rate": 1.0534378660899343e-05, | |
| "epoch": 0.95, | |
| "step": 63100 | |
| }, | |
| { | |
| "loss": 1.1223, | |
| "grad_norm": 1.7471164464950562, | |
| "learning_rate": 1.0459283289778018e-05, | |
| "epoch": 0.95, | |
| "step": 63125 | |
| }, | |
| { | |
| "loss": 1.0884, | |
| "grad_norm": 1.466888189315796, | |
| "learning_rate": 1.0384187918656695e-05, | |
| "epoch": 0.95, | |
| "step": 63150 | |
| }, | |
| { | |
| "loss": 1.1307, | |
| "grad_norm": 1.0170552730560303, | |
| "learning_rate": 1.0309092547535371e-05, | |
| "epoch": 0.95, | |
| "step": 63175 | |
| }, | |
| { | |
| "loss": 1.0969, | |
| "grad_norm": 1.400824785232544, | |
| "learning_rate": 1.0233997176414046e-05, | |
| "epoch": 0.95, | |
| "step": 63200 | |
| }, | |
| { | |
| "loss": 1.1182, | |
| "grad_norm": 1.231128454208374, | |
| "learning_rate": 1.0158901805292722e-05, | |
| "epoch": 0.95, | |
| "step": 63225 | |
| }, | |
| { | |
| "loss": 1.1886, | |
| "grad_norm": 1.5293277502059937, | |
| "learning_rate": 1.0083806434171399e-05, | |
| "epoch": 0.95, | |
| "step": 63250 | |
| }, | |
| { | |
| "loss": 1.1112, | |
| "grad_norm": 1.315816879272461, | |
| "learning_rate": 1.0008711063050074e-05, | |
| "epoch": 0.95, | |
| "step": 63275 | |
| }, | |
| { | |
| "loss": 1.1224, | |
| "grad_norm": 1.0503865480422974, | |
| "learning_rate": 9.93361569192875e-06, | |
| "epoch": 0.95, | |
| "step": 63300 | |
| }, | |
| { | |
| "loss": 1.1516, | |
| "grad_norm": 1.5667177438735962, | |
| "learning_rate": 9.858520320807425e-06, | |
| "epoch": 0.95, | |
| "step": 63325 | |
| }, | |
| { | |
| "loss": 1.2137, | |
| "grad_norm": 1.9724977016448975, | |
| "learning_rate": 9.783424949686101e-06, | |
| "epoch": 0.95, | |
| "step": 63350 | |
| }, | |
| { | |
| "loss": 1.1568, | |
| "grad_norm": 1.0087287425994873, | |
| "learning_rate": 9.708329578564778e-06, | |
| "epoch": 0.95, | |
| "step": 63375 | |
| }, | |
| { | |
| "loss": 1.0902, | |
| "grad_norm": 1.067909836769104, | |
| "learning_rate": 9.633234207443454e-06, | |
| "epoch": 0.95, | |
| "step": 63400 | |
| }, | |
| { | |
| "loss": 1.1043, | |
| "grad_norm": 2.0196101665496826, | |
| "learning_rate": 9.558138836322129e-06, | |
| "epoch": 0.95, | |
| "step": 63425 | |
| }, | |
| { | |
| "loss": 1.0683, | |
| "grad_norm": 1.6897556781768799, | |
| "learning_rate": 9.483043465200806e-06, | |
| "epoch": 0.95, | |
| "step": 63450 | |
| }, | |
| { | |
| "loss": 1.1969, | |
| "grad_norm": 1.4092940092086792, | |
| "learning_rate": 9.407948094079482e-06, | |
| "epoch": 0.95, | |
| "step": 63475 | |
| }, | |
| { | |
| "loss": 1.1159, | |
| "grad_norm": 1.5447856187820435, | |
| "learning_rate": 9.332852722958157e-06, | |
| "epoch": 0.95, | |
| "step": 63500 | |
| }, | |
| { | |
| "loss": 1.119, | |
| "grad_norm": 1.5372124910354614, | |
| "learning_rate": 9.257757351836834e-06, | |
| "epoch": 0.95, | |
| "step": 63525 | |
| }, | |
| { | |
| "loss": 1.1478, | |
| "grad_norm": 1.2936185598373413, | |
| "learning_rate": 9.18266198071551e-06, | |
| "epoch": 0.95, | |
| "step": 63550 | |
| }, | |
| { | |
| "loss": 1.1206, | |
| "grad_norm": 0.9974470138549805, | |
| "learning_rate": 9.107566609594185e-06, | |
| "epoch": 0.95, | |
| "step": 63575 | |
| }, | |
| { | |
| "loss": 1.1306, | |
| "grad_norm": 1.8973299264907837, | |
| "learning_rate": 9.03247123847286e-06, | |
| "epoch": 0.96, | |
| "step": 63600 | |
| }, | |
| { | |
| "loss": 1.1003, | |
| "grad_norm": 1.2269550561904907, | |
| "learning_rate": 8.957375867351536e-06, | |
| "epoch": 0.96, | |
| "step": 63625 | |
| }, | |
| { | |
| "loss": 1.089, | |
| "grad_norm": 0.9575774073600769, | |
| "learning_rate": 8.882280496230212e-06, | |
| "epoch": 0.96, | |
| "step": 63650 | |
| }, | |
| { | |
| "loss": 1.1122, | |
| "grad_norm": 1.47458016872406, | |
| "learning_rate": 8.807185125108889e-06, | |
| "epoch": 0.96, | |
| "step": 63675 | |
| }, | |
| { | |
| "loss": 1.0881, | |
| "grad_norm": 1.407483696937561, | |
| "learning_rate": 8.732089753987564e-06, | |
| "epoch": 0.96, | |
| "step": 63700 | |
| }, | |
| { | |
| "loss": 1.1247, | |
| "grad_norm": 1.4554179906845093, | |
| "learning_rate": 8.65699438286624e-06, | |
| "epoch": 0.96, | |
| "step": 63725 | |
| }, | |
| { | |
| "loss": 1.1963, | |
| "grad_norm": 1.2854880094528198, | |
| "learning_rate": 8.581899011744917e-06, | |
| "epoch": 0.96, | |
| "step": 63750 | |
| }, | |
| { | |
| "loss": 1.1419, | |
| "grad_norm": 1.089011311531067, | |
| "learning_rate": 8.506803640623593e-06, | |
| "epoch": 0.96, | |
| "step": 63775 | |
| }, | |
| { | |
| "loss": 1.0494, | |
| "grad_norm": 1.1109488010406494, | |
| "learning_rate": 8.431708269502268e-06, | |
| "epoch": 0.96, | |
| "step": 63800 | |
| }, | |
| { | |
| "loss": 1.084, | |
| "grad_norm": 1.5390805006027222, | |
| "learning_rate": 8.356612898380945e-06, | |
| "epoch": 0.96, | |
| "step": 63825 | |
| }, | |
| { | |
| "loss": 1.0779, | |
| "grad_norm": 1.3624422550201416, | |
| "learning_rate": 8.28151752725962e-06, | |
| "epoch": 0.96, | |
| "step": 63850 | |
| }, | |
| { | |
| "loss": 1.092, | |
| "grad_norm": 1.3689720630645752, | |
| "learning_rate": 8.206422156138296e-06, | |
| "epoch": 0.96, | |
| "step": 63875 | |
| }, | |
| { | |
| "loss": 1.1746, | |
| "grad_norm": 1.2376459836959839, | |
| "learning_rate": 8.131326785016973e-06, | |
| "epoch": 0.96, | |
| "step": 63900 | |
| }, | |
| { | |
| "loss": 1.147, | |
| "grad_norm": 1.5905089378356934, | |
| "learning_rate": 8.056231413895649e-06, | |
| "epoch": 0.96, | |
| "step": 63925 | |
| }, | |
| { | |
| "loss": 1.1585, | |
| "grad_norm": 2.2680752277374268, | |
| "learning_rate": 7.981136042774323e-06, | |
| "epoch": 0.96, | |
| "step": 63950 | |
| }, | |
| { | |
| "loss": 1.1892, | |
| "grad_norm": 1.5471032857894897, | |
| "learning_rate": 7.906040671653e-06, | |
| "epoch": 0.96, | |
| "step": 63975 | |
| }, | |
| { | |
| "loss": 1.1173, | |
| "grad_norm": 1.456756591796875, | |
| "learning_rate": 7.830945300531675e-06, | |
| "epoch": 0.96, | |
| "step": 64000 | |
| }, | |
| { | |
| "loss": 1.0896, | |
| "grad_norm": 1.550498604774475, | |
| "learning_rate": 7.75584992941035e-06, | |
| "epoch": 0.96, | |
| "step": 64025 | |
| }, | |
| { | |
| "loss": 1.0944, | |
| "grad_norm": 1.8201286792755127, | |
| "learning_rate": 7.680754558289028e-06, | |
| "epoch": 0.96, | |
| "step": 64050 | |
| }, | |
| { | |
| "loss": 1.1145, | |
| "grad_norm": 1.392923355102539, | |
| "learning_rate": 7.605659187167703e-06, | |
| "epoch": 0.96, | |
| "step": 64075 | |
| }, | |
| { | |
| "loss": 1.103, | |
| "grad_norm": 2.5812623500823975, | |
| "learning_rate": 7.53056381604638e-06, | |
| "epoch": 0.96, | |
| "step": 64100 | |
| }, | |
| { | |
| "loss": 1.1365, | |
| "grad_norm": 1.7856642007827759, | |
| "learning_rate": 7.455468444925055e-06, | |
| "epoch": 0.96, | |
| "step": 64125 | |
| }, | |
| { | |
| "loss": 1.0761, | |
| "grad_norm": 1.8361400365829468, | |
| "learning_rate": 7.3803730738037315e-06, | |
| "epoch": 0.96, | |
| "step": 64150 | |
| }, | |
| { | |
| "loss": 1.0984, | |
| "grad_norm": 1.183370590209961, | |
| "learning_rate": 7.305277702682407e-06, | |
| "epoch": 0.96, | |
| "step": 64175 | |
| }, | |
| { | |
| "loss": 1.1064, | |
| "grad_norm": 1.8606791496276855, | |
| "learning_rate": 7.230182331561083e-06, | |
| "epoch": 0.96, | |
| "step": 64200 | |
| }, | |
| { | |
| "loss": 1.1489, | |
| "grad_norm": 1.3013999462127686, | |
| "learning_rate": 7.15508696043976e-06, | |
| "epoch": 0.96, | |
| "step": 64225 | |
| }, | |
| { | |
| "loss": 1.0736, | |
| "grad_norm": 1.1197832822799683, | |
| "learning_rate": 7.079991589318435e-06, | |
| "epoch": 0.96, | |
| "step": 64250 | |
| }, | |
| { | |
| "loss": 1.05, | |
| "grad_norm": 1.160477876663208, | |
| "learning_rate": 7.00489621819711e-06, | |
| "epoch": 0.97, | |
| "step": 64275 | |
| }, | |
| { | |
| "loss": 1.1556, | |
| "grad_norm": 1.7113288640975952, | |
| "learning_rate": 6.929800847075786e-06, | |
| "epoch": 0.97, | |
| "step": 64300 | |
| }, | |
| { | |
| "loss": 1.154, | |
| "grad_norm": 0.7315987348556519, | |
| "learning_rate": 6.854705475954462e-06, | |
| "epoch": 0.97, | |
| "step": 64325 | |
| }, | |
| { | |
| "loss": 1.086, | |
| "grad_norm": 1.7214363813400269, | |
| "learning_rate": 6.779610104833138e-06, | |
| "epoch": 0.97, | |
| "step": 64350 | |
| }, | |
| { | |
| "loss": 1.0921, | |
| "grad_norm": 0.8723170161247253, | |
| "learning_rate": 6.704514733711814e-06, | |
| "epoch": 0.97, | |
| "step": 64375 | |
| }, | |
| { | |
| "loss": 1.1255, | |
| "grad_norm": 1.9772207736968994, | |
| "learning_rate": 6.6294193625904905e-06, | |
| "epoch": 0.97, | |
| "step": 64400 | |
| }, | |
| { | |
| "loss": 1.1423, | |
| "grad_norm": 2.272956371307373, | |
| "learning_rate": 6.554323991469166e-06, | |
| "epoch": 0.97, | |
| "step": 64425 | |
| }, | |
| { | |
| "loss": 1.1113, | |
| "grad_norm": 1.6277108192443848, | |
| "learning_rate": 6.479228620347842e-06, | |
| "epoch": 0.97, | |
| "step": 64450 | |
| }, | |
| { | |
| "loss": 1.0637, | |
| "grad_norm": 1.5888078212738037, | |
| "learning_rate": 6.404133249226519e-06, | |
| "epoch": 0.97, | |
| "step": 64475 | |
| }, | |
| { | |
| "loss": 1.1832, | |
| "grad_norm": 1.4354815483093262, | |
| "learning_rate": 6.329037878105194e-06, | |
| "epoch": 0.97, | |
| "step": 64500 | |
| }, | |
| { | |
| "loss": 1.0812, | |
| "grad_norm": 1.2866464853286743, | |
| "learning_rate": 6.2539425069838705e-06, | |
| "epoch": 0.97, | |
| "step": 64525 | |
| }, | |
| { | |
| "loss": 1.0742, | |
| "grad_norm": 1.206624984741211, | |
| "learning_rate": 6.178847135862546e-06, | |
| "epoch": 0.97, | |
| "step": 64550 | |
| }, | |
| { | |
| "loss": 1.1, | |
| "grad_norm": 1.9013807773590088, | |
| "learning_rate": 6.1037517647412214e-06, | |
| "epoch": 0.97, | |
| "step": 64575 | |
| }, | |
| { | |
| "loss": 1.1164, | |
| "grad_norm": 1.2918732166290283, | |
| "learning_rate": 6.028656393619898e-06, | |
| "epoch": 0.97, | |
| "step": 64600 | |
| }, | |
| { | |
| "loss": 1.126, | |
| "grad_norm": 0.9611725211143494, | |
| "learning_rate": 5.953561022498574e-06, | |
| "epoch": 0.97, | |
| "step": 64625 | |
| }, | |
| { | |
| "loss": 1.1867, | |
| "grad_norm": 1.8491181135177612, | |
| "learning_rate": 5.8784656513772496e-06, | |
| "epoch": 0.97, | |
| "step": 64650 | |
| }, | |
| { | |
| "loss": 1.1415, | |
| "grad_norm": 1.3857682943344116, | |
| "learning_rate": 5.803370280255925e-06, | |
| "epoch": 0.97, | |
| "step": 64675 | |
| }, | |
| { | |
| "loss": 1.1016, | |
| "grad_norm": 1.7419966459274292, | |
| "learning_rate": 5.728274909134601e-06, | |
| "epoch": 0.97, | |
| "step": 64700 | |
| }, | |
| { | |
| "loss": 1.1174, | |
| "grad_norm": 2.3053975105285645, | |
| "learning_rate": 5.653179538013277e-06, | |
| "epoch": 0.97, | |
| "step": 64725 | |
| }, | |
| { | |
| "loss": 1.125, | |
| "grad_norm": 1.3925187587738037, | |
| "learning_rate": 5.578084166891953e-06, | |
| "epoch": 0.97, | |
| "step": 64750 | |
| }, | |
| { | |
| "loss": 1.0828, | |
| "grad_norm": 2.014289140701294, | |
| "learning_rate": 5.5029887957706295e-06, | |
| "epoch": 0.97, | |
| "step": 64775 | |
| }, | |
| { | |
| "loss": 1.1461, | |
| "grad_norm": 2.213609457015991, | |
| "learning_rate": 5.427893424649305e-06, | |
| "epoch": 0.97, | |
| "step": 64800 | |
| }, | |
| { | |
| "loss": 1.0558, | |
| "grad_norm": 1.0734851360321045, | |
| "learning_rate": 5.3527980535279805e-06, | |
| "epoch": 0.97, | |
| "step": 64825 | |
| }, | |
| { | |
| "loss": 1.1006, | |
| "grad_norm": 1.362658977508545, | |
| "learning_rate": 5.277702682406657e-06, | |
| "epoch": 0.97, | |
| "step": 64850 | |
| }, | |
| { | |
| "loss": 1.1512, | |
| "grad_norm": 1.9621925354003906, | |
| "learning_rate": 5.202607311285332e-06, | |
| "epoch": 0.97, | |
| "step": 64875 | |
| }, | |
| { | |
| "loss": 1.0506, | |
| "grad_norm": 1.6093008518218994, | |
| "learning_rate": 5.127511940164009e-06, | |
| "epoch": 0.97, | |
| "step": 64900 | |
| }, | |
| { | |
| "loss": 1.174, | |
| "grad_norm": 2.4825665950775146, | |
| "learning_rate": 5.052416569042685e-06, | |
| "epoch": 0.98, | |
| "step": 64925 | |
| }, | |
| { | |
| "loss": 1.0542, | |
| "grad_norm": 1.142391562461853, | |
| "learning_rate": 4.97732119792136e-06, | |
| "epoch": 0.98, | |
| "step": 64950 | |
| }, | |
| { | |
| "loss": 1.158, | |
| "grad_norm": 2.0994620323181152, | |
| "learning_rate": 4.902225826800036e-06, | |
| "epoch": 0.98, | |
| "step": 64975 | |
| }, | |
| { | |
| "loss": 1.1055, | |
| "grad_norm": 1.4533177614212036, | |
| "learning_rate": 4.827130455678712e-06, | |
| "epoch": 0.98, | |
| "step": 65000 | |
| }, | |
| { | |
| "loss": 1.1457, | |
| "grad_norm": 2.113051176071167, | |
| "learning_rate": 4.752035084557388e-06, | |
| "epoch": 0.98, | |
| "step": 65025 | |
| }, | |
| { | |
| "loss": 1.1301, | |
| "grad_norm": 1.4814103841781616, | |
| "learning_rate": 4.676939713436064e-06, | |
| "epoch": 0.98, | |
| "step": 65050 | |
| }, | |
| { | |
| "loss": 1.1091, | |
| "grad_norm": 1.3998606204986572, | |
| "learning_rate": 4.60184434231474e-06, | |
| "epoch": 0.98, | |
| "step": 65075 | |
| }, | |
| { | |
| "loss": 1.1468, | |
| "grad_norm": 1.4728342294692993, | |
| "learning_rate": 4.526748971193416e-06, | |
| "epoch": 0.98, | |
| "step": 65100 | |
| }, | |
| { | |
| "loss": 1.1008, | |
| "grad_norm": 1.29282808303833, | |
| "learning_rate": 4.451653600072092e-06, | |
| "epoch": 0.98, | |
| "step": 65125 | |
| }, | |
| { | |
| "loss": 1.0818, | |
| "grad_norm": 0.9691277146339417, | |
| "learning_rate": 4.376558228950768e-06, | |
| "epoch": 0.98, | |
| "step": 65150 | |
| }, | |
| { | |
| "loss": 1.1651, | |
| "grad_norm": 1.5705621242523193, | |
| "learning_rate": 4.301462857829443e-06, | |
| "epoch": 0.98, | |
| "step": 65175 | |
| }, | |
| { | |
| "loss": 1.0648, | |
| "grad_norm": 1.7766458988189697, | |
| "learning_rate": 4.2263674867081194e-06, | |
| "epoch": 0.98, | |
| "step": 65200 | |
| }, | |
| { | |
| "loss": 1.0789, | |
| "grad_norm": 1.3525621891021729, | |
| "learning_rate": 4.151272115586796e-06, | |
| "epoch": 0.98, | |
| "step": 65225 | |
| }, | |
| { | |
| "loss": 1.0551, | |
| "grad_norm": 1.631650447845459, | |
| "learning_rate": 4.076176744465471e-06, | |
| "epoch": 0.98, | |
| "step": 65250 | |
| }, | |
| { | |
| "loss": 1.1308, | |
| "grad_norm": 1.7099614143371582, | |
| "learning_rate": 4.0010813733441476e-06, | |
| "epoch": 0.98, | |
| "step": 65275 | |
| }, | |
| { | |
| "loss": 1.1203, | |
| "grad_norm": 1.104038119316101, | |
| "learning_rate": 3.925986002222823e-06, | |
| "epoch": 0.98, | |
| "step": 65300 | |
| }, | |
| { | |
| "loss": 1.0968, | |
| "grad_norm": 1.4031529426574707, | |
| "learning_rate": 3.8508906311014985e-06, | |
| "epoch": 0.98, | |
| "step": 65325 | |
| }, | |
| { | |
| "loss": 1.15, | |
| "grad_norm": 2.0685653686523438, | |
| "learning_rate": 3.775795259980175e-06, | |
| "epoch": 0.98, | |
| "step": 65350 | |
| }, | |
| { | |
| "loss": 1.0778, | |
| "grad_norm": 1.4602687358856201, | |
| "learning_rate": 3.7006998888588508e-06, | |
| "epoch": 0.98, | |
| "step": 65375 | |
| }, | |
| { | |
| "loss": 1.1119, | |
| "grad_norm": 1.377066969871521, | |
| "learning_rate": 3.625604517737527e-06, | |
| "epoch": 0.98, | |
| "step": 65400 | |
| }, | |
| { | |
| "loss": 1.1806, | |
| "grad_norm": 1.3793482780456543, | |
| "learning_rate": 3.550509146616203e-06, | |
| "epoch": 0.98, | |
| "step": 65425 | |
| }, | |
| { | |
| "loss": 1.0924, | |
| "grad_norm": 1.323262095451355, | |
| "learning_rate": 3.4754137754948785e-06, | |
| "epoch": 0.98, | |
| "step": 65450 | |
| }, | |
| { | |
| "loss": 1.1011, | |
| "grad_norm": 1.6005733013153076, | |
| "learning_rate": 3.4003184043735544e-06, | |
| "epoch": 0.98, | |
| "step": 65475 | |
| }, | |
| { | |
| "loss": 1.1188, | |
| "grad_norm": 1.2906062602996826, | |
| "learning_rate": 3.3252230332522303e-06, | |
| "epoch": 0.98, | |
| "step": 65500 | |
| }, | |
| { | |
| "loss": 1.0887, | |
| "grad_norm": 2.869511365890503, | |
| "learning_rate": 3.250127662130906e-06, | |
| "epoch": 0.98, | |
| "step": 65525 | |
| }, | |
| { | |
| "loss": 1.1348, | |
| "grad_norm": 1.084037184715271, | |
| "learning_rate": 3.1750322910095825e-06, | |
| "epoch": 0.98, | |
| "step": 65550 | |
| }, | |
| { | |
| "loss": 1.1218, | |
| "grad_norm": 1.7096983194351196, | |
| "learning_rate": 3.099936919888258e-06, | |
| "epoch": 0.98, | |
| "step": 65575 | |
| }, | |
| { | |
| "loss": 1.134, | |
| "grad_norm": 2.19433856010437, | |
| "learning_rate": 3.0278453636117873e-06, | |
| "epoch": 0.99, | |
| "step": 65600 | |
| }, | |
| { | |
| "loss": 1.1709, | |
| "grad_norm": 2.7771689891815186, | |
| "learning_rate": 2.9527499924904628e-06, | |
| "epoch": 0.99, | |
| "step": 65625 | |
| }, | |
| { | |
| "loss": 1.1184, | |
| "grad_norm": 1.367202877998352, | |
| "learning_rate": 2.877654621369139e-06, | |
| "epoch": 0.99, | |
| "step": 65650 | |
| }, | |
| { | |
| "loss": 1.1393, | |
| "grad_norm": 1.163167953491211, | |
| "learning_rate": 2.802559250247815e-06, | |
| "epoch": 0.99, | |
| "step": 65675 | |
| }, | |
| { | |
| "loss": 1.1185, | |
| "grad_norm": 1.9196585416793823, | |
| "learning_rate": 2.727463879126491e-06, | |
| "epoch": 0.99, | |
| "step": 65700 | |
| }, | |
| { | |
| "loss": 1.0776, | |
| "grad_norm": 1.1097601652145386, | |
| "learning_rate": 2.652368508005167e-06, | |
| "epoch": 0.99, | |
| "step": 65725 | |
| }, | |
| { | |
| "loss": 1.1115, | |
| "grad_norm": 1.8407388925552368, | |
| "learning_rate": 2.5772731368838427e-06, | |
| "epoch": 0.99, | |
| "step": 65750 | |
| }, | |
| { | |
| "loss": 1.1397, | |
| "grad_norm": 1.3508464097976685, | |
| "learning_rate": 2.5021777657625186e-06, | |
| "epoch": 0.99, | |
| "step": 65775 | |
| }, | |
| { | |
| "loss": 1.0408, | |
| "grad_norm": 1.3656666278839111, | |
| "learning_rate": 2.4270823946411945e-06, | |
| "epoch": 0.99, | |
| "step": 65800 | |
| }, | |
| { | |
| "loss": 1.1232, | |
| "grad_norm": 1.121551275253296, | |
| "learning_rate": 2.3519870235198704e-06, | |
| "epoch": 0.99, | |
| "step": 65825 | |
| }, | |
| { | |
| "loss": 1.1695, | |
| "grad_norm": 3.1583876609802246, | |
| "learning_rate": 2.2768916523985463e-06, | |
| "epoch": 0.99, | |
| "step": 65850 | |
| }, | |
| { | |
| "loss": 1.114, | |
| "grad_norm": 1.4626102447509766, | |
| "learning_rate": 2.2017962812772222e-06, | |
| "epoch": 0.99, | |
| "step": 65875 | |
| }, | |
| { | |
| "loss": 1.1404, | |
| "grad_norm": 1.164562702178955, | |
| "learning_rate": 2.126700910155898e-06, | |
| "epoch": 0.99, | |
| "step": 65900 | |
| }, | |
| { | |
| "loss": 1.0749, | |
| "grad_norm": 1.151390790939331, | |
| "learning_rate": 2.051605539034574e-06, | |
| "epoch": 0.99, | |
| "step": 65925 | |
| }, | |
| { | |
| "loss": 1.1223, | |
| "grad_norm": 1.4878361225128174, | |
| "learning_rate": 1.97651016791325e-06, | |
| "epoch": 0.99, | |
| "step": 65950 | |
| }, | |
| { | |
| "loss": 1.0713, | |
| "grad_norm": 0.9274216294288635, | |
| "learning_rate": 1.9014147967919258e-06, | |
| "epoch": 0.99, | |
| "step": 65975 | |
| }, | |
| { | |
| "loss": 1.0495, | |
| "grad_norm": 1.1772902011871338, | |
| "learning_rate": 1.8263194256706017e-06, | |
| "epoch": 0.99, | |
| "step": 66000 | |
| }, | |
| { | |
| "loss": 1.1357, | |
| "grad_norm": 1.2464003562927246, | |
| "learning_rate": 1.7512240545492774e-06, | |
| "epoch": 0.99, | |
| "step": 66025 | |
| }, | |
| { | |
| "loss": 1.0778, | |
| "grad_norm": 1.813460350036621, | |
| "learning_rate": 1.6761286834279536e-06, | |
| "epoch": 0.99, | |
| "step": 66050 | |
| }, | |
| { | |
| "loss": 1.1034, | |
| "grad_norm": 1.6727650165557861, | |
| "learning_rate": 1.6010333123066297e-06, | |
| "epoch": 0.99, | |
| "step": 66075 | |
| }, | |
| { | |
| "loss": 1.1252, | |
| "grad_norm": 1.8909765481948853, | |
| "learning_rate": 1.5259379411853054e-06, | |
| "epoch": 0.99, | |
| "step": 66100 | |
| }, | |
| { | |
| "loss": 1.0249, | |
| "grad_norm": 1.8321037292480469, | |
| "learning_rate": 1.4508425700639813e-06, | |
| "epoch": 0.99, | |
| "step": 66125 | |
| }, | |
| { | |
| "loss": 1.0836, | |
| "grad_norm": 1.3860995769500732, | |
| "learning_rate": 1.3757471989426574e-06, | |
| "epoch": 0.99, | |
| "step": 66150 | |
| }, | |
| { | |
| "loss": 1.0984, | |
| "grad_norm": 1.2683864831924438, | |
| "learning_rate": 1.300651827821333e-06, | |
| "epoch": 0.99, | |
| "step": 66175 | |
| }, | |
| { | |
| "loss": 1.0977, | |
| "grad_norm": 2.86045503616333, | |
| "learning_rate": 1.225556456700009e-06, | |
| "epoch": 0.99, | |
| "step": 66200 | |
| }, | |
| { | |
| "loss": 1.138, | |
| "grad_norm": 1.2112616300582886, | |
| "learning_rate": 1.150461085578685e-06, | |
| "epoch": 0.99, | |
| "step": 66225 | |
| }, | |
| { | |
| "loss": 1.1231, | |
| "grad_norm": 1.550032615661621, | |
| "learning_rate": 1.0753657144573608e-06, | |
| "epoch": 0.99, | |
| "step": 66250 | |
| }, | |
| { | |
| "loss": 1.1238, | |
| "grad_norm": 1.13444185256958, | |
| "learning_rate": 1.0002703433360369e-06, | |
| "epoch": 1.0, | |
| "step": 66275 | |
| }, | |
| { | |
| "loss": 1.1818, | |
| "grad_norm": 2.8684732913970947, | |
| "learning_rate": 9.251749722147127e-07, | |
| "epoch": 1.0, | |
| "step": 66300 | |
| }, | |
| { | |
| "loss": 1.1398, | |
| "grad_norm": 1.3792351484298706, | |
| "learning_rate": 8.500796010933886e-07, | |
| "epoch": 1.0, | |
| "step": 66325 | |
| }, | |
| { | |
| "loss": 1.1148, | |
| "grad_norm": 1.5899792909622192, | |
| "learning_rate": 7.749842299720645e-07, | |
| "epoch": 1.0, | |
| "step": 66350 | |
| }, | |
| { | |
| "loss": 1.1708, | |
| "grad_norm": 2.143692970275879, | |
| "learning_rate": 6.998888588507405e-07, | |
| "epoch": 1.0, | |
| "step": 66375 | |
| }, | |
| { | |
| "loss": 1.1092, | |
| "grad_norm": 1.2674062252044678, | |
| "learning_rate": 6.247934877294164e-07, | |
| "epoch": 1.0, | |
| "step": 66400 | |
| }, | |
| { | |
| "loss": 1.0326, | |
| "grad_norm": 1.1335889101028442, | |
| "learning_rate": 5.496981166080923e-07, | |
| "epoch": 1.0, | |
| "step": 66425 | |
| }, | |
| { | |
| "loss": 1.0948, | |
| "grad_norm": 1.5896003246307373, | |
| "learning_rate": 4.7460274548676816e-07, | |
| "epoch": 1.0, | |
| "step": 66450 | |
| }, | |
| { | |
| "loss": 1.1036, | |
| "grad_norm": 1.4150667190551758, | |
| "learning_rate": 3.995073743654441e-07, | |
| "epoch": 1.0, | |
| "step": 66475 | |
| }, | |
| { | |
| "loss": 1.14, | |
| "grad_norm": 1.4912337064743042, | |
| "learning_rate": 3.244120032441201e-07, | |
| "epoch": 1.0, | |
| "step": 66500 | |
| }, | |
| { | |
| "loss": 1.0704, | |
| "grad_norm": 1.5823650360107422, | |
| "learning_rate": 2.493166321227959e-07, | |
| "epoch": 1.0, | |
| "step": 66525 | |
| }, | |
| { | |
| "loss": 1.1301, | |
| "grad_norm": 1.9806722402572632, | |
| "learning_rate": 1.7422126100147188e-07, | |
| "epoch": 1.0, | |
| "step": 66550 | |
| }, | |
| { | |
| "loss": 1.1368, | |
| "grad_norm": 1.6522107124328613, | |
| "learning_rate": 9.91258898801478e-08, | |
| "epoch": 1.0, | |
| "step": 66575 | |
| }, | |
| { | |
| "train_runtime": 164326.412, | |
| "train_samples_per_second": 0.81, | |
| "train_steps_per_second": 0.405, | |
| "total_flos": 7.363589651988972e+17, | |
| "train_loss": 1.1616554066605727, | |
| "epoch": 1.0, | |
| "step": 66583 | |
| } | |
| ] |