Text Classification
Transformers
Safetensors
English
llama4_text
text-generation
Generated from Trainer
sft
trl
Instructions to use dangvansam/MobileLLM-R1-140M-turn-detection-en with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dangvansam/MobileLLM-R1-140M-turn-detection-en with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="dangvansam/MobileLLM-R1-140M-turn-detection-en")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("dangvansam/MobileLLM-R1-140M-turn-detection-en") model = AutoModelForCausalLM.from_pretrained("dangvansam/MobileLLM-R1-140M-turn-detection-en") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.2999670003299966, | |
| "eval_steps": 50000, | |
| "global_step": 150000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021999780002199976, | |
| "grad_norm": 1.3319692611694336, | |
| "learning_rate": 9.9e-06, | |
| "loss": 0.5606, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.004399956000439995, | |
| "grad_norm": 1.4906107187271118, | |
| "learning_rate": 9.99564212611423e-06, | |
| "loss": 0.5478, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.006599934000659994, | |
| "grad_norm": 1.542100191116333, | |
| "learning_rate": 9.99124023330032e-06, | |
| "loss": 0.5508, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.00879991200087999, | |
| "grad_norm": 1.59752357006073, | |
| "learning_rate": 9.98683834048641e-06, | |
| "loss": 0.566, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.010999890001099988, | |
| "grad_norm": 1.535962462425232, | |
| "learning_rate": 9.9824364476725e-06, | |
| "loss": 0.5604, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.013199868001319988, | |
| "grad_norm": 1.6737797260284424, | |
| "learning_rate": 9.97803455485859e-06, | |
| "loss": 0.5651, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.015399846001539985, | |
| "grad_norm": 1.5698915719985962, | |
| "learning_rate": 9.97363266204468e-06, | |
| "loss": 0.5384, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01759982400175998, | |
| "grad_norm": 1.501681923866272, | |
| "learning_rate": 9.96923076923077e-06, | |
| "loss": 0.5454, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01979980200197998, | |
| "grad_norm": 1.6730457544326782, | |
| "learning_rate": 9.96482887641686e-06, | |
| "loss": 0.5515, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.021999780002199976, | |
| "grad_norm": 1.7415289878845215, | |
| "learning_rate": 9.960426983602949e-06, | |
| "loss": 0.5641, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.024199758002419976, | |
| "grad_norm": 1.7273190021514893, | |
| "learning_rate": 9.95602509078904e-06, | |
| "loss": 0.559, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.026399736002639975, | |
| "grad_norm": 1.7402335405349731, | |
| "learning_rate": 9.95162319797513e-06, | |
| "loss": 0.5391, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.02859971400285997, | |
| "grad_norm": 1.8390350341796875, | |
| "learning_rate": 9.94722130516122e-06, | |
| "loss": 0.5563, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.03079969200307997, | |
| "grad_norm": 1.3122905492782593, | |
| "learning_rate": 9.94281941234731e-06, | |
| "loss": 0.5594, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.032999670003299966, | |
| "grad_norm": 1.3811813592910767, | |
| "learning_rate": 9.9384175195334e-06, | |
| "loss": 0.5592, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.03519964800351996, | |
| "grad_norm": 1.8546792268753052, | |
| "learning_rate": 9.934015626719489e-06, | |
| "loss": 0.5522, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.037399626003739965, | |
| "grad_norm": 1.6485520601272583, | |
| "learning_rate": 9.92961373390558e-06, | |
| "loss": 0.5354, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.03959960400395996, | |
| "grad_norm": 1.366682767868042, | |
| "learning_rate": 9.92521184109167e-06, | |
| "loss": 0.5507, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.04179958200417996, | |
| "grad_norm": 1.7690378427505493, | |
| "learning_rate": 9.92080994827776e-06, | |
| "loss": 0.5444, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.04399956000439995, | |
| "grad_norm": 1.5437382459640503, | |
| "learning_rate": 9.91640805546385e-06, | |
| "loss": 0.5651, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.046199538004619956, | |
| "grad_norm": 1.156587839126587, | |
| "learning_rate": 9.91200616264994e-06, | |
| "loss": 0.562, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.04839951600483995, | |
| "grad_norm": 1.7941553592681885, | |
| "learning_rate": 9.90760426983603e-06, | |
| "loss": 0.5659, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.05059949400505995, | |
| "grad_norm": 1.4848283529281616, | |
| "learning_rate": 9.903202377022121e-06, | |
| "loss": 0.5629, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.05279947200527995, | |
| "grad_norm": 1.4486836194992065, | |
| "learning_rate": 9.898800484208211e-06, | |
| "loss": 0.5459, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.054999450005499946, | |
| "grad_norm": 1.731554388999939, | |
| "learning_rate": 9.894398591394302e-06, | |
| "loss": 0.5626, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.05719942800571994, | |
| "grad_norm": 1.6251667737960815, | |
| "learning_rate": 9.88999669858039e-06, | |
| "loss": 0.5516, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.05939940600593994, | |
| "grad_norm": 1.256371021270752, | |
| "learning_rate": 9.88559480576648e-06, | |
| "loss": 0.5459, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.06159938400615994, | |
| "grad_norm": 1.418700933456421, | |
| "learning_rate": 9.88119291295257e-06, | |
| "loss": 0.5495, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.06379936200637994, | |
| "grad_norm": 1.6376900672912598, | |
| "learning_rate": 9.876791020138661e-06, | |
| "loss": 0.5641, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.06599934000659993, | |
| "grad_norm": 1.5085667371749878, | |
| "learning_rate": 9.872389127324751e-06, | |
| "loss": 0.5625, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.06819931800681993, | |
| "grad_norm": 1.5381278991699219, | |
| "learning_rate": 9.86798723451084e-06, | |
| "loss": 0.5603, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.07039929600703992, | |
| "grad_norm": 1.5536515712738037, | |
| "learning_rate": 9.86358534169693e-06, | |
| "loss": 0.5529, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.07259927400725993, | |
| "grad_norm": 1.9047861099243164, | |
| "learning_rate": 9.85918344888302e-06, | |
| "loss": 0.549, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.07479925200747993, | |
| "grad_norm": 1.517338514328003, | |
| "learning_rate": 9.85478155606911e-06, | |
| "loss": 0.561, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.07699923000769993, | |
| "grad_norm": 1.5779054164886475, | |
| "learning_rate": 9.850379663255201e-06, | |
| "loss": 0.5706, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.07919920800791992, | |
| "grad_norm": 1.704124927520752, | |
| "learning_rate": 9.845977770441291e-06, | |
| "loss": 0.5523, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.08139918600813992, | |
| "grad_norm": 1.5121921300888062, | |
| "learning_rate": 9.84157587762738e-06, | |
| "loss": 0.5539, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.08359916400835991, | |
| "grad_norm": 1.6511967182159424, | |
| "learning_rate": 9.83717398481347e-06, | |
| "loss": 0.5443, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.08579914200857991, | |
| "grad_norm": 1.719138503074646, | |
| "learning_rate": 9.83277209199956e-06, | |
| "loss": 0.55, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.0879991200087999, | |
| "grad_norm": 1.6003084182739258, | |
| "learning_rate": 9.82837019918565e-06, | |
| "loss": 0.5588, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.09019909800901992, | |
| "grad_norm": 1.787855625152588, | |
| "learning_rate": 9.823968306371741e-06, | |
| "loss": 0.5636, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.09239907600923991, | |
| "grad_norm": 1.6582859754562378, | |
| "learning_rate": 9.819566413557831e-06, | |
| "loss": 0.5618, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.09459905400945991, | |
| "grad_norm": 1.696978211402893, | |
| "learning_rate": 9.81516452074392e-06, | |
| "loss": 0.5546, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.0967990320096799, | |
| "grad_norm": 1.8410296440124512, | |
| "learning_rate": 9.81076262793001e-06, | |
| "loss": 0.5471, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.0989990100098999, | |
| "grad_norm": 1.736607313156128, | |
| "learning_rate": 9.8063607351161e-06, | |
| "loss": 0.5461, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.1011989880101199, | |
| "grad_norm": 1.507016897201538, | |
| "learning_rate": 9.80195884230219e-06, | |
| "loss": 0.5609, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.10339896601033989, | |
| "grad_norm": 1.6941606998443604, | |
| "learning_rate": 9.797556949488281e-06, | |
| "loss": 0.5656, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.1055989440105599, | |
| "grad_norm": 1.6578975915908813, | |
| "learning_rate": 9.793155056674371e-06, | |
| "loss": 0.5624, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.1077989220107799, | |
| "grad_norm": 1.6376292705535889, | |
| "learning_rate": 9.78875316386046e-06, | |
| "loss": 0.5483, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.10999890001099989, | |
| "grad_norm": 1.8150690793991089, | |
| "learning_rate": 9.78435127104655e-06, | |
| "loss": 0.5739, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.11219887801121989, | |
| "grad_norm": 1.8733948469161987, | |
| "learning_rate": 9.77994937823264e-06, | |
| "loss": 0.5511, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.11439885601143988, | |
| "grad_norm": 1.3109201192855835, | |
| "learning_rate": 9.77554748541873e-06, | |
| "loss": 0.5584, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.11659883401165988, | |
| "grad_norm": 2.0025064945220947, | |
| "learning_rate": 9.771145592604821e-06, | |
| "loss": 0.5638, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.11879881201187988, | |
| "grad_norm": 1.584830641746521, | |
| "learning_rate": 9.76674369979091e-06, | |
| "loss": 0.575, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.12099879001209989, | |
| "grad_norm": 1.7688754796981812, | |
| "learning_rate": 9.762341806977e-06, | |
| "loss": 0.5603, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.12319876801231988, | |
| "grad_norm": 1.6688051223754883, | |
| "learning_rate": 9.75793991416309e-06, | |
| "loss": 0.5746, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.12539874601253986, | |
| "grad_norm": 1.6409167051315308, | |
| "learning_rate": 9.753538021349182e-06, | |
| "loss": 0.5469, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.12759872401275987, | |
| "grad_norm": 1.5867542028427124, | |
| "learning_rate": 9.74913612853527e-06, | |
| "loss": 0.5414, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.12979870201297988, | |
| "grad_norm": 1.7665027379989624, | |
| "learning_rate": 9.744734235721361e-06, | |
| "loss": 0.5574, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.13199868001319986, | |
| "grad_norm": 1.298757553100586, | |
| "learning_rate": 9.740332342907451e-06, | |
| "loss": 0.5356, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.13419865801341987, | |
| "grad_norm": 1.381654143333435, | |
| "learning_rate": 9.735930450093542e-06, | |
| "loss": 0.5525, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.13639863601363986, | |
| "grad_norm": 1.398958683013916, | |
| "learning_rate": 9.731528557279632e-06, | |
| "loss": 0.5427, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.13859861401385987, | |
| "grad_norm": 1.4779409170150757, | |
| "learning_rate": 9.727126664465722e-06, | |
| "loss": 0.5583, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.14079859201407985, | |
| "grad_norm": 1.5421425104141235, | |
| "learning_rate": 9.72272477165181e-06, | |
| "loss": 0.5484, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.14299857001429986, | |
| "grad_norm": 1.7208441495895386, | |
| "learning_rate": 9.718322878837901e-06, | |
| "loss": 0.5478, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.14519854801451987, | |
| "grad_norm": 1.643373727798462, | |
| "learning_rate": 9.713920986023991e-06, | |
| "loss": 0.5742, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.14739852601473985, | |
| "grad_norm": 1.5801072120666504, | |
| "learning_rate": 9.709519093210082e-06, | |
| "loss": 0.5516, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.14959850401495986, | |
| "grad_norm": 1.5034841299057007, | |
| "learning_rate": 9.705117200396172e-06, | |
| "loss": 0.558, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.15179848201517984, | |
| "grad_norm": 1.6282888650894165, | |
| "learning_rate": 9.70071530758226e-06, | |
| "loss": 0.5575, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.15399846001539985, | |
| "grad_norm": 1.4846858978271484, | |
| "learning_rate": 9.69631341476835e-06, | |
| "loss": 0.5487, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.15619843801561983, | |
| "grad_norm": 1.6254215240478516, | |
| "learning_rate": 9.691911521954441e-06, | |
| "loss": 0.5443, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.15839841601583984, | |
| "grad_norm": 1.7018550634384155, | |
| "learning_rate": 9.687509629140531e-06, | |
| "loss": 0.556, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.16059839401605983, | |
| "grad_norm": 1.6466326713562012, | |
| "learning_rate": 9.683107736326622e-06, | |
| "loss": 0.5541, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.16279837201627984, | |
| "grad_norm": 1.4446876049041748, | |
| "learning_rate": 9.678705843512712e-06, | |
| "loss": 0.5464, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.16499835001649985, | |
| "grad_norm": 1.5896605253219604, | |
| "learning_rate": 9.6743039506988e-06, | |
| "loss": 0.5394, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.16719832801671983, | |
| "grad_norm": 1.837875485420227, | |
| "learning_rate": 9.66990205788489e-06, | |
| "loss": 0.5351, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.16939830601693984, | |
| "grad_norm": 1.5089105367660522, | |
| "learning_rate": 9.665500165070981e-06, | |
| "loss": 0.5434, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.17159828401715982, | |
| "grad_norm": 1.5068552494049072, | |
| "learning_rate": 9.661098272257071e-06, | |
| "loss": 0.5542, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.17379826201737983, | |
| "grad_norm": 1.7671160697937012, | |
| "learning_rate": 9.656696379443162e-06, | |
| "loss": 0.5434, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.1759982400175998, | |
| "grad_norm": 1.612404227256775, | |
| "learning_rate": 9.652294486629252e-06, | |
| "loss": 0.5481, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.17819821801781982, | |
| "grad_norm": 1.403520941734314, | |
| "learning_rate": 9.64789259381534e-06, | |
| "loss": 0.5436, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.18039819601803983, | |
| "grad_norm": 1.786060094833374, | |
| "learning_rate": 9.64349070100143e-06, | |
| "loss": 0.5571, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.1825981740182598, | |
| "grad_norm": 1.6619782447814941, | |
| "learning_rate": 9.639088808187521e-06, | |
| "loss": 0.5402, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.18479815201847982, | |
| "grad_norm": 1.805365800857544, | |
| "learning_rate": 9.634686915373611e-06, | |
| "loss": 0.5705, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.1869981300186998, | |
| "grad_norm": 1.5753322839736938, | |
| "learning_rate": 9.630285022559702e-06, | |
| "loss": 0.5477, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.18919810801891981, | |
| "grad_norm": 1.688490629196167, | |
| "learning_rate": 9.625883129745792e-06, | |
| "loss": 0.5497, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.1913980860191398, | |
| "grad_norm": 1.5862349271774292, | |
| "learning_rate": 9.62148123693188e-06, | |
| "loss": 0.5374, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.1935980640193598, | |
| "grad_norm": 1.8771247863769531, | |
| "learning_rate": 9.61707934411797e-06, | |
| "loss": 0.5445, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.19579804201957982, | |
| "grad_norm": 1.432055115699768, | |
| "learning_rate": 9.612677451304061e-06, | |
| "loss": 0.5478, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.1979980200197998, | |
| "grad_norm": 1.7091459035873413, | |
| "learning_rate": 9.608275558490151e-06, | |
| "loss": 0.5509, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.2001979980200198, | |
| "grad_norm": 1.5979877710342407, | |
| "learning_rate": 9.603873665676242e-06, | |
| "loss": 0.5439, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.2023979760202398, | |
| "grad_norm": 1.5256608724594116, | |
| "learning_rate": 9.599471772862332e-06, | |
| "loss": 0.546, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.2045979540204598, | |
| "grad_norm": 1.7038841247558594, | |
| "learning_rate": 9.595069880048422e-06, | |
| "loss": 0.5455, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.20679793202067978, | |
| "grad_norm": 1.6116039752960205, | |
| "learning_rate": 9.590667987234512e-06, | |
| "loss": 0.5448, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.2089979100208998, | |
| "grad_norm": 1.6021257638931274, | |
| "learning_rate": 9.586266094420603e-06, | |
| "loss": 0.5373, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.2111978880211198, | |
| "grad_norm": 1.8599495887756348, | |
| "learning_rate": 9.581864201606691e-06, | |
| "loss": 0.5445, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.21339786602133978, | |
| "grad_norm": 1.5737359523773193, | |
| "learning_rate": 9.577462308792782e-06, | |
| "loss": 0.554, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.2155978440215598, | |
| "grad_norm": 1.9932422637939453, | |
| "learning_rate": 9.573060415978872e-06, | |
| "loss": 0.5466, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.21779782202177977, | |
| "grad_norm": 1.2846128940582275, | |
| "learning_rate": 9.568658523164962e-06, | |
| "loss": 0.552, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.21999780002199978, | |
| "grad_norm": 1.845566987991333, | |
| "learning_rate": 9.564256630351052e-06, | |
| "loss": 0.5351, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.22219777802221977, | |
| "grad_norm": 1.7098534107208252, | |
| "learning_rate": 9.559854737537143e-06, | |
| "loss": 0.5701, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.22439775602243978, | |
| "grad_norm": 1.6359370946884155, | |
| "learning_rate": 9.555452844723231e-06, | |
| "loss": 0.5399, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.22659773402265979, | |
| "grad_norm": 1.8628222942352295, | |
| "learning_rate": 9.551050951909322e-06, | |
| "loss": 0.5428, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.22879771202287977, | |
| "grad_norm": 1.7202619314193726, | |
| "learning_rate": 9.546649059095412e-06, | |
| "loss": 0.5473, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.23099769002309978, | |
| "grad_norm": 1.6408450603485107, | |
| "learning_rate": 9.542247166281502e-06, | |
| "loss": 0.5566, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.23319766802331976, | |
| "grad_norm": 1.6586904525756836, | |
| "learning_rate": 9.537845273467592e-06, | |
| "loss": 0.5357, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.23539764602353977, | |
| "grad_norm": 1.8505043983459473, | |
| "learning_rate": 9.533443380653683e-06, | |
| "loss": 0.5596, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.23759762402375975, | |
| "grad_norm": 1.9244803190231323, | |
| "learning_rate": 9.529041487839771e-06, | |
| "loss": 0.5428, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.23979760202397976, | |
| "grad_norm": 1.5375540256500244, | |
| "learning_rate": 9.524639595025862e-06, | |
| "loss": 0.5478, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.24199758002419977, | |
| "grad_norm": 1.7372453212738037, | |
| "learning_rate": 9.520237702211952e-06, | |
| "loss": 0.5458, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.24419755802441975, | |
| "grad_norm": 1.5542049407958984, | |
| "learning_rate": 9.515835809398042e-06, | |
| "loss": 0.5412, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.24639753602463976, | |
| "grad_norm": 1.5235602855682373, | |
| "learning_rate": 9.511433916584132e-06, | |
| "loss": 0.5631, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.24859751402485974, | |
| "grad_norm": 1.7347521781921387, | |
| "learning_rate": 9.507032023770221e-06, | |
| "loss": 0.5508, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.2507974920250797, | |
| "grad_norm": 1.8189500570297241, | |
| "learning_rate": 9.502630130956311e-06, | |
| "loss": 0.5346, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.25299747002529976, | |
| "grad_norm": 1.5607105493545532, | |
| "learning_rate": 9.498228238142402e-06, | |
| "loss": 0.5454, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.25519744802551975, | |
| "grad_norm": 1.5799516439437866, | |
| "learning_rate": 9.493826345328492e-06, | |
| "loss": 0.5271, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.25739742602573973, | |
| "grad_norm": 1.4460997581481934, | |
| "learning_rate": 9.489424452514582e-06, | |
| "loss": 0.5437, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.25959740402595977, | |
| "grad_norm": 1.368635892868042, | |
| "learning_rate": 9.485022559700672e-06, | |
| "loss": 0.5442, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.26179738202617975, | |
| "grad_norm": 1.8246245384216309, | |
| "learning_rate": 9.480620666886761e-06, | |
| "loss": 0.5321, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.26399736002639973, | |
| "grad_norm": 1.8881937265396118, | |
| "learning_rate": 9.476218774072851e-06, | |
| "loss": 0.5639, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.2661973380266197, | |
| "grad_norm": 1.39218008518219, | |
| "learning_rate": 9.471816881258942e-06, | |
| "loss": 0.5634, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.26839731602683975, | |
| "grad_norm": 1.5577659606933594, | |
| "learning_rate": 9.467414988445032e-06, | |
| "loss": 0.5422, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.27059729402705973, | |
| "grad_norm": 1.9022492170333862, | |
| "learning_rate": 9.463013095631122e-06, | |
| "loss": 0.5429, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.2727972720272797, | |
| "grad_norm": 1.7101701498031616, | |
| "learning_rate": 9.458611202817212e-06, | |
| "loss": 0.5473, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.27499725002749975, | |
| "grad_norm": 2.0155210494995117, | |
| "learning_rate": 9.454209310003301e-06, | |
| "loss": 0.5689, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.27719722802771973, | |
| "grad_norm": 1.994775414466858, | |
| "learning_rate": 9.449807417189393e-06, | |
| "loss": 0.514, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.2793972060279397, | |
| "grad_norm": 1.5826818943023682, | |
| "learning_rate": 9.445405524375483e-06, | |
| "loss": 0.5413, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.2815971840281597, | |
| "grad_norm": 1.589729905128479, | |
| "learning_rate": 9.441003631561574e-06, | |
| "loss": 0.5339, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.28379716202837973, | |
| "grad_norm": 1.8156132698059082, | |
| "learning_rate": 9.436601738747662e-06, | |
| "loss": 0.5546, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.2859971400285997, | |
| "grad_norm": 1.576416254043579, | |
| "learning_rate": 9.432199845933752e-06, | |
| "loss": 0.5465, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.2881971180288197, | |
| "grad_norm": 1.9609074592590332, | |
| "learning_rate": 9.427797953119843e-06, | |
| "loss": 0.553, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.29039709602903974, | |
| "grad_norm": 1.5881434679031372, | |
| "learning_rate": 9.423396060305933e-06, | |
| "loss": 0.5377, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.2925970740292597, | |
| "grad_norm": 1.569200038909912, | |
| "learning_rate": 9.418994167492023e-06, | |
| "loss": 0.5467, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.2947970520294797, | |
| "grad_norm": 1.7305947542190552, | |
| "learning_rate": 9.414592274678112e-06, | |
| "loss": 0.5388, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.2969970300296997, | |
| "grad_norm": 1.9278624057769775, | |
| "learning_rate": 9.410190381864202e-06, | |
| "loss": 0.5419, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.2991970080299197, | |
| "grad_norm": 1.6430861949920654, | |
| "learning_rate": 9.405788489050292e-06, | |
| "loss": 0.5579, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.3013969860301397, | |
| "grad_norm": 1.4233689308166504, | |
| "learning_rate": 9.401386596236383e-06, | |
| "loss": 0.5385, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.3035969640303597, | |
| "grad_norm": 1.705346941947937, | |
| "learning_rate": 9.396984703422473e-06, | |
| "loss": 0.5491, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.3057969420305797, | |
| "grad_norm": 1.7933902740478516, | |
| "learning_rate": 9.392582810608563e-06, | |
| "loss": 0.5513, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.3079969200307997, | |
| "grad_norm": 1.901663899421692, | |
| "learning_rate": 9.388180917794652e-06, | |
| "loss": 0.5614, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.3101968980310197, | |
| "grad_norm": 1.6877708435058594, | |
| "learning_rate": 9.383779024980742e-06, | |
| "loss": 0.5334, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.31239687603123967, | |
| "grad_norm": 1.7979609966278076, | |
| "learning_rate": 9.379377132166832e-06, | |
| "loss": 0.5527, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.3145968540314597, | |
| "grad_norm": 1.7708429098129272, | |
| "learning_rate": 9.374975239352923e-06, | |
| "loss": 0.5386, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.3167968320316797, | |
| "grad_norm": 1.3621147871017456, | |
| "learning_rate": 9.370573346539013e-06, | |
| "loss": 0.5626, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.31899681003189967, | |
| "grad_norm": 1.5842787027359009, | |
| "learning_rate": 9.366171453725103e-06, | |
| "loss": 0.529, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.32119678803211965, | |
| "grad_norm": 1.817987084388733, | |
| "learning_rate": 9.361769560911192e-06, | |
| "loss": 0.538, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.3233967660323397, | |
| "grad_norm": 1.6293082237243652, | |
| "learning_rate": 9.357367668097282e-06, | |
| "loss": 0.5481, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.32559674403255967, | |
| "grad_norm": 1.5916519165039062, | |
| "learning_rate": 9.352965775283372e-06, | |
| "loss": 0.5534, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.32779672203277965, | |
| "grad_norm": 1.5773463249206543, | |
| "learning_rate": 9.348563882469463e-06, | |
| "loss": 0.5501, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.3299967000329997, | |
| "grad_norm": 1.9787790775299072, | |
| "learning_rate": 9.344161989655553e-06, | |
| "loss": 0.541, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.3321966780332197, | |
| "grad_norm": 1.3281339406967163, | |
| "learning_rate": 9.339760096841642e-06, | |
| "loss": 0.539, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.33439665603343965, | |
| "grad_norm": 2.091588020324707, | |
| "learning_rate": 9.335358204027732e-06, | |
| "loss": 0.5393, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.33659663403365964, | |
| "grad_norm": 1.912660837173462, | |
| "learning_rate": 9.330956311213822e-06, | |
| "loss": 0.5168, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.3387966120338797, | |
| "grad_norm": 1.7248882055282593, | |
| "learning_rate": 9.326554418399912e-06, | |
| "loss": 0.538, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.34099659003409966, | |
| "grad_norm": 1.8949754238128662, | |
| "learning_rate": 9.322152525586003e-06, | |
| "loss": 0.5444, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.34319656803431964, | |
| "grad_norm": 1.4323865175247192, | |
| "learning_rate": 9.317750632772093e-06, | |
| "loss": 0.542, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.3453965460345397, | |
| "grad_norm": 1.7454142570495605, | |
| "learning_rate": 9.313348739958182e-06, | |
| "loss": 0.5346, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.34759652403475966, | |
| "grad_norm": 2.214750289916992, | |
| "learning_rate": 9.308946847144272e-06, | |
| "loss": 0.5391, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.34979650203497964, | |
| "grad_norm": 1.7991106510162354, | |
| "learning_rate": 9.304544954330362e-06, | |
| "loss": 0.551, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.3519964800351996, | |
| "grad_norm": 1.7487062215805054, | |
| "learning_rate": 9.300143061516452e-06, | |
| "loss": 0.5536, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.35419645803541966, | |
| "grad_norm": 1.7137202024459839, | |
| "learning_rate": 9.295741168702543e-06, | |
| "loss": 0.5472, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.35639643603563964, | |
| "grad_norm": 1.569287657737732, | |
| "learning_rate": 9.291339275888633e-06, | |
| "loss": 0.5286, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.3585964140358596, | |
| "grad_norm": 1.805232286453247, | |
| "learning_rate": 9.286937383074723e-06, | |
| "loss": 0.535, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.36079639203607966, | |
| "grad_norm": 1.8445895910263062, | |
| "learning_rate": 9.282535490260814e-06, | |
| "loss": 0.5297, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.36299637003629964, | |
| "grad_norm": 1.8282471895217896, | |
| "learning_rate": 9.278133597446904e-06, | |
| "loss": 0.5341, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.3651963480365196, | |
| "grad_norm": 1.5979552268981934, | |
| "learning_rate": 9.273731704632994e-06, | |
| "loss": 0.5471, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.3673963260367396, | |
| "grad_norm": 1.6148823499679565, | |
| "learning_rate": 9.269329811819083e-06, | |
| "loss": 0.534, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.36959630403695964, | |
| "grad_norm": 1.7306467294692993, | |
| "learning_rate": 9.264927919005173e-06, | |
| "loss": 0.5475, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.3717962820371796, | |
| "grad_norm": 1.5774517059326172, | |
| "learning_rate": 9.260526026191263e-06, | |
| "loss": 0.5604, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.3739962600373996, | |
| "grad_norm": 1.6581697463989258, | |
| "learning_rate": 9.256124133377354e-06, | |
| "loss": 0.5474, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.37619623803761965, | |
| "grad_norm": 1.8324202299118042, | |
| "learning_rate": 9.251722240563444e-06, | |
| "loss": 0.5341, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.37839621603783963, | |
| "grad_norm": 1.7121940851211548, | |
| "learning_rate": 9.247320347749532e-06, | |
| "loss": 0.5538, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.3805961940380596, | |
| "grad_norm": 1.8483502864837646, | |
| "learning_rate": 9.242918454935623e-06, | |
| "loss": 0.5231, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.3827961720382796, | |
| "grad_norm": 1.7600507736206055, | |
| "learning_rate": 9.238516562121713e-06, | |
| "loss": 0.5581, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.38499615003849963, | |
| "grad_norm": 1.779398798942566, | |
| "learning_rate": 9.234114669307803e-06, | |
| "loss": 0.5468, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.3871961280387196, | |
| "grad_norm": 1.7732363939285278, | |
| "learning_rate": 9.229712776493894e-06, | |
| "loss": 0.558, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.3893961060389396, | |
| "grad_norm": 1.7597503662109375, | |
| "learning_rate": 9.225310883679984e-06, | |
| "loss": 0.5231, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.39159608403915963, | |
| "grad_norm": 1.8344216346740723, | |
| "learning_rate": 9.220908990866072e-06, | |
| "loss": 0.5428, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.3937960620393796, | |
| "grad_norm": 1.662919044494629, | |
| "learning_rate": 9.216507098052163e-06, | |
| "loss": 0.5314, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.3959960400395996, | |
| "grad_norm": 1.3180632591247559, | |
| "learning_rate": 9.212105205238253e-06, | |
| "loss": 0.5335, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.3981960180398196, | |
| "grad_norm": 1.8466808795928955, | |
| "learning_rate": 9.207703312424343e-06, | |
| "loss": 0.5251, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.4003959960400396, | |
| "grad_norm": 1.942530632019043, | |
| "learning_rate": 9.203301419610434e-06, | |
| "loss": 0.5361, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.4025959740402596, | |
| "grad_norm": 1.6795586347579956, | |
| "learning_rate": 9.198899526796524e-06, | |
| "loss": 0.5322, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.4047959520404796, | |
| "grad_norm": 1.8028258085250854, | |
| "learning_rate": 9.194497633982612e-06, | |
| "loss": 0.5332, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.4069959300406996, | |
| "grad_norm": 1.9072916507720947, | |
| "learning_rate": 9.190095741168703e-06, | |
| "loss": 0.5436, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.4091959080409196, | |
| "grad_norm": 1.849950909614563, | |
| "learning_rate": 9.185693848354793e-06, | |
| "loss": 0.5464, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.4113958860411396, | |
| "grad_norm": 1.8676297664642334, | |
| "learning_rate": 9.181291955540883e-06, | |
| "loss": 0.5598, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.41359586404135956, | |
| "grad_norm": 1.8260865211486816, | |
| "learning_rate": 9.176890062726974e-06, | |
| "loss": 0.5433, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.4157958420415796, | |
| "grad_norm": 1.6370753049850464, | |
| "learning_rate": 9.172488169913064e-06, | |
| "loss": 0.5473, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.4179958200417996, | |
| "grad_norm": 1.583030104637146, | |
| "learning_rate": 9.168086277099152e-06, | |
| "loss": 0.5478, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.42019579804201956, | |
| "grad_norm": 1.895065188407898, | |
| "learning_rate": 9.163684384285243e-06, | |
| "loss": 0.5391, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.4223957760422396, | |
| "grad_norm": 1.6694116592407227, | |
| "learning_rate": 9.159282491471333e-06, | |
| "loss": 0.5206, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.4245957540424596, | |
| "grad_norm": 1.630575180053711, | |
| "learning_rate": 9.154880598657423e-06, | |
| "loss": 0.5451, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.42679573204267957, | |
| "grad_norm": 2.0224249362945557, | |
| "learning_rate": 9.150478705843514e-06, | |
| "loss": 0.5334, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.42899571004289955, | |
| "grad_norm": 1.6329941749572754, | |
| "learning_rate": 9.146076813029602e-06, | |
| "loss": 0.5279, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.4311956880431196, | |
| "grad_norm": 1.3999661207199097, | |
| "learning_rate": 9.141674920215694e-06, | |
| "loss": 0.5366, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.43339566604333957, | |
| "grad_norm": 1.5041108131408691, | |
| "learning_rate": 9.137273027401784e-06, | |
| "loss": 0.5324, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.43559564404355955, | |
| "grad_norm": 1.714513897895813, | |
| "learning_rate": 9.132871134587875e-06, | |
| "loss": 0.5341, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.4377956220437796, | |
| "grad_norm": 1.7554248571395874, | |
| "learning_rate": 9.128469241773963e-06, | |
| "loss": 0.5436, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.43999560004399957, | |
| "grad_norm": 1.665436029434204, | |
| "learning_rate": 9.124067348960054e-06, | |
| "loss": 0.5299, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.44219557804421955, | |
| "grad_norm": 1.668437123298645, | |
| "learning_rate": 9.119665456146144e-06, | |
| "loss": 0.5188, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.44439555604443953, | |
| "grad_norm": 1.9339295625686646, | |
| "learning_rate": 9.115263563332234e-06, | |
| "loss": 0.5574, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.44659553404465957, | |
| "grad_norm": 1.7263190746307373, | |
| "learning_rate": 9.110861670518324e-06, | |
| "loss": 0.5469, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.44879551204487955, | |
| "grad_norm": 1.5733555555343628, | |
| "learning_rate": 9.106459777704415e-06, | |
| "loss": 0.529, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.45099549004509953, | |
| "grad_norm": 1.6786284446716309, | |
| "learning_rate": 9.102057884890503e-06, | |
| "loss": 0.539, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.45319546804531957, | |
| "grad_norm": 1.6025316715240479, | |
| "learning_rate": 9.097655992076594e-06, | |
| "loss": 0.5394, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.45539544604553955, | |
| "grad_norm": 1.7945187091827393, | |
| "learning_rate": 9.093254099262684e-06, | |
| "loss": 0.5233, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.45759542404575954, | |
| "grad_norm": 1.6407737731933594, | |
| "learning_rate": 9.088852206448774e-06, | |
| "loss": 0.547, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.4597954020459795, | |
| "grad_norm": 1.623547911643982, | |
| "learning_rate": 9.084450313634864e-06, | |
| "loss": 0.5609, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.46199538004619956, | |
| "grad_norm": 1.7454668283462524, | |
| "learning_rate": 9.080048420820953e-06, | |
| "loss": 0.5484, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.46419535804641954, | |
| "grad_norm": 2.0362443923950195, | |
| "learning_rate": 9.075646528007043e-06, | |
| "loss": 0.5199, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.4663953360466395, | |
| "grad_norm": 1.8968782424926758, | |
| "learning_rate": 9.071244635193134e-06, | |
| "loss": 0.5471, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.46859531404685956, | |
| "grad_norm": 1.7040385007858276, | |
| "learning_rate": 9.066842742379224e-06, | |
| "loss": 0.5167, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.47079529204707954, | |
| "grad_norm": 1.8420989513397217, | |
| "learning_rate": 9.062440849565314e-06, | |
| "loss": 0.5359, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.4729952700472995, | |
| "grad_norm": 1.6311464309692383, | |
| "learning_rate": 9.058038956751404e-06, | |
| "loss": 0.5375, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.4751952480475195, | |
| "grad_norm": 2.0437209606170654, | |
| "learning_rate": 9.053637063937493e-06, | |
| "loss": 0.5427, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.47739522604773954, | |
| "grad_norm": 1.6111825704574585, | |
| "learning_rate": 9.049235171123583e-06, | |
| "loss": 0.526, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.4795952040479595, | |
| "grad_norm": 1.3677709102630615, | |
| "learning_rate": 9.044833278309674e-06, | |
| "loss": 0.5328, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.4817951820481795, | |
| "grad_norm": 2.1056365966796875, | |
| "learning_rate": 9.040431385495764e-06, | |
| "loss": 0.5391, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.48399516004839954, | |
| "grad_norm": 1.807760238647461, | |
| "learning_rate": 9.036029492681854e-06, | |
| "loss": 0.5606, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.4861951380486195, | |
| "grad_norm": 1.8556056022644043, | |
| "learning_rate": 9.031627599867944e-06, | |
| "loss": 0.5351, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.4883951160488395, | |
| "grad_norm": 2.0106847286224365, | |
| "learning_rate": 9.027225707054033e-06, | |
| "loss": 0.5542, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.4905950940490595, | |
| "grad_norm": 1.6676563024520874, | |
| "learning_rate": 9.022823814240123e-06, | |
| "loss": 0.538, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.4927950720492795, | |
| "grad_norm": 1.4103186130523682, | |
| "learning_rate": 9.018421921426214e-06, | |
| "loss": 0.5241, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.4949950500494995, | |
| "grad_norm": 1.8032267093658447, | |
| "learning_rate": 9.014020028612304e-06, | |
| "loss": 0.5367, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.4971950280497195, | |
| "grad_norm": 1.6195557117462158, | |
| "learning_rate": 9.009618135798394e-06, | |
| "loss": 0.5434, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.4993950060499395, | |
| "grad_norm": 1.7808386087417603, | |
| "learning_rate": 9.005216242984484e-06, | |
| "loss": 0.5421, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.5015949840501595, | |
| "grad_norm": 1.746341586112976, | |
| "learning_rate": 9.000814350170573e-06, | |
| "loss": 0.5362, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.5037949620503795, | |
| "grad_norm": 2.1744487285614014, | |
| "learning_rate": 8.996412457356663e-06, | |
| "loss": 0.5243, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.5059949400505995, | |
| "grad_norm": 1.7973219156265259, | |
| "learning_rate": 8.992010564542755e-06, | |
| "loss": 0.5504, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.5081949180508195, | |
| "grad_norm": 1.6203027963638306, | |
| "learning_rate": 8.987608671728844e-06, | |
| "loss": 0.5426, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.5103948960510395, | |
| "grad_norm": 1.6453986167907715, | |
| "learning_rate": 8.983206778914934e-06, | |
| "loss": 0.548, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.5125948740512595, | |
| "grad_norm": 1.8163201808929443, | |
| "learning_rate": 8.978804886101024e-06, | |
| "loss": 0.5306, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.5147948520514795, | |
| "grad_norm": 1.7606194019317627, | |
| "learning_rate": 8.974402993287115e-06, | |
| "loss": 0.5318, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.5169948300516994, | |
| "grad_norm": 1.9621275663375854, | |
| "learning_rate": 8.970001100473205e-06, | |
| "loss": 0.5289, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.5191948080519195, | |
| "grad_norm": 1.707217812538147, | |
| "learning_rate": 8.965599207659295e-06, | |
| "loss": 0.5374, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.5213947860521395, | |
| "grad_norm": 1.9041409492492676, | |
| "learning_rate": 8.961197314845384e-06, | |
| "loss": 0.5512, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.5235947640523595, | |
| "grad_norm": 1.7021831274032593, | |
| "learning_rate": 8.956795422031474e-06, | |
| "loss": 0.5363, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.5257947420525795, | |
| "grad_norm": 1.6546313762664795, | |
| "learning_rate": 8.952393529217564e-06, | |
| "loss": 0.5355, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.5279947200527995, | |
| "grad_norm": 2.1298437118530273, | |
| "learning_rate": 8.947991636403655e-06, | |
| "loss": 0.5336, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.5301946980530194, | |
| "grad_norm": 1.6351710557937622, | |
| "learning_rate": 8.943589743589745e-06, | |
| "loss": 0.5298, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.5323946760532394, | |
| "grad_norm": 1.7850167751312256, | |
| "learning_rate": 8.939187850775835e-06, | |
| "loss": 0.5295, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.5345946540534595, | |
| "grad_norm": 1.6639127731323242, | |
| "learning_rate": 8.934785957961924e-06, | |
| "loss": 0.5482, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.5367946320536795, | |
| "grad_norm": 1.6761794090270996, | |
| "learning_rate": 8.930384065148014e-06, | |
| "loss": 0.5398, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.5389946100538995, | |
| "grad_norm": 2.0362918376922607, | |
| "learning_rate": 8.925982172334104e-06, | |
| "loss": 0.5387, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.5411945880541195, | |
| "grad_norm": 1.5029228925704956, | |
| "learning_rate": 8.921580279520195e-06, | |
| "loss": 0.5296, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.5433945660543394, | |
| "grad_norm": 1.7153294086456299, | |
| "learning_rate": 8.917178386706285e-06, | |
| "loss": 0.5395, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.5455945440545594, | |
| "grad_norm": 1.6009351015090942, | |
| "learning_rate": 8.912776493892375e-06, | |
| "loss": 0.5301, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.5477945220547794, | |
| "grad_norm": 1.7909400463104248, | |
| "learning_rate": 8.908374601078464e-06, | |
| "loss": 0.5292, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.5499945000549995, | |
| "grad_norm": 2.1847472190856934, | |
| "learning_rate": 8.903972708264554e-06, | |
| "loss": 0.5326, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.5521944780552195, | |
| "grad_norm": 2.270923614501953, | |
| "learning_rate": 8.899570815450644e-06, | |
| "loss": 0.545, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.5543944560554395, | |
| "grad_norm": 2.044668436050415, | |
| "learning_rate": 8.895168922636735e-06, | |
| "loss": 0.5335, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.5565944340556594, | |
| "grad_norm": 1.9989433288574219, | |
| "learning_rate": 8.890767029822825e-06, | |
| "loss": 0.5516, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.5587944120558794, | |
| "grad_norm": 1.7529683113098145, | |
| "learning_rate": 8.886365137008914e-06, | |
| "loss": 0.5379, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.5609943900560994, | |
| "grad_norm": 1.4954921007156372, | |
| "learning_rate": 8.881963244195004e-06, | |
| "loss": 0.5346, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.5631943680563194, | |
| "grad_norm": 1.7510510683059692, | |
| "learning_rate": 8.877561351381094e-06, | |
| "loss": 0.5186, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.5653943460565395, | |
| "grad_norm": 1.8264451026916504, | |
| "learning_rate": 8.873159458567184e-06, | |
| "loss": 0.5419, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.5675943240567595, | |
| "grad_norm": 2.1004931926727295, | |
| "learning_rate": 8.868757565753275e-06, | |
| "loss": 0.5419, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.5697943020569795, | |
| "grad_norm": 1.9316984415054321, | |
| "learning_rate": 8.864355672939365e-06, | |
| "loss": 0.5209, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.5719942800571994, | |
| "grad_norm": 2.182731866836548, | |
| "learning_rate": 8.859953780125454e-06, | |
| "loss": 0.5356, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.5741942580574194, | |
| "grad_norm": 1.6151630878448486, | |
| "learning_rate": 8.855551887311544e-06, | |
| "loss": 0.5419, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.5763942360576394, | |
| "grad_norm": 1.8083909749984741, | |
| "learning_rate": 8.851149994497634e-06, | |
| "loss": 0.5218, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.5785942140578594, | |
| "grad_norm": 1.6356123685836792, | |
| "learning_rate": 8.846748101683724e-06, | |
| "loss": 0.5256, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.5807941920580795, | |
| "grad_norm": 2.2701175212860107, | |
| "learning_rate": 8.842346208869815e-06, | |
| "loss": 0.534, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.5829941700582995, | |
| "grad_norm": 1.9146398305892944, | |
| "learning_rate": 8.837944316055905e-06, | |
| "loss": 0.5399, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.5851941480585194, | |
| "grad_norm": 1.9954113960266113, | |
| "learning_rate": 8.833542423241995e-06, | |
| "loss": 0.537, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.5873941260587394, | |
| "grad_norm": 1.6357481479644775, | |
| "learning_rate": 8.829140530428086e-06, | |
| "loss": 0.5322, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.5895941040589594, | |
| "grad_norm": 1.7142163515090942, | |
| "learning_rate": 8.824738637614176e-06, | |
| "loss": 0.5475, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.5917940820591794, | |
| "grad_norm": 1.7539161443710327, | |
| "learning_rate": 8.820336744800266e-06, | |
| "loss": 0.523, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.5939940600593994, | |
| "grad_norm": 1.6141777038574219, | |
| "learning_rate": 8.815934851986355e-06, | |
| "loss": 0.5318, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.5961940380596195, | |
| "grad_norm": 2.0629382133483887, | |
| "learning_rate": 8.811532959172445e-06, | |
| "loss": 0.5334, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.5983940160598394, | |
| "grad_norm": 1.999254584312439, | |
| "learning_rate": 8.807131066358535e-06, | |
| "loss": 0.5504, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.6005939940600594, | |
| "grad_norm": 1.8531382083892822, | |
| "learning_rate": 8.802729173544626e-06, | |
| "loss": 0.5376, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.6027939720602794, | |
| "grad_norm": 1.4768983125686646, | |
| "learning_rate": 8.798327280730716e-06, | |
| "loss": 0.5344, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.6049939500604994, | |
| "grad_norm": 1.7571672201156616, | |
| "learning_rate": 8.793925387916804e-06, | |
| "loss": 0.5342, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.6071939280607194, | |
| "grad_norm": 1.7986180782318115, | |
| "learning_rate": 8.789523495102895e-06, | |
| "loss": 0.5474, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.6093939060609393, | |
| "grad_norm": 1.9569381475448608, | |
| "learning_rate": 8.785121602288985e-06, | |
| "loss": 0.5403, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.6115938840611594, | |
| "grad_norm": 2.1773102283477783, | |
| "learning_rate": 8.780719709475075e-06, | |
| "loss": 0.5239, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.6137938620613794, | |
| "grad_norm": 2.050550937652588, | |
| "learning_rate": 8.776317816661166e-06, | |
| "loss": 0.5253, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.6159938400615994, | |
| "grad_norm": 1.7763617038726807, | |
| "learning_rate": 8.771915923847256e-06, | |
| "loss": 0.5283, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.6181938180618194, | |
| "grad_norm": 1.6701637506484985, | |
| "learning_rate": 8.767514031033344e-06, | |
| "loss": 0.5316, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.6203937960620394, | |
| "grad_norm": 1.6922410726547241, | |
| "learning_rate": 8.763112138219435e-06, | |
| "loss": 0.5384, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.6225937740622594, | |
| "grad_norm": 2.3351800441741943, | |
| "learning_rate": 8.758710245405525e-06, | |
| "loss": 0.5462, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.6247937520624793, | |
| "grad_norm": 1.7946525812149048, | |
| "learning_rate": 8.754308352591615e-06, | |
| "loss": 0.5341, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.6269937300626994, | |
| "grad_norm": 1.6485981941223145, | |
| "learning_rate": 8.749906459777706e-06, | |
| "loss": 0.5229, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.6291937080629194, | |
| "grad_norm": 2.138338327407837, | |
| "learning_rate": 8.745504566963796e-06, | |
| "loss": 0.5489, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.6313936860631394, | |
| "grad_norm": 1.7668613195419312, | |
| "learning_rate": 8.741102674149884e-06, | |
| "loss": 0.5239, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.6335936640633594, | |
| "grad_norm": 2.0970587730407715, | |
| "learning_rate": 8.736700781335975e-06, | |
| "loss": 0.5313, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.6357936420635794, | |
| "grad_norm": 1.7800394296646118, | |
| "learning_rate": 8.732298888522065e-06, | |
| "loss": 0.5322, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.6379936200637993, | |
| "grad_norm": 1.7388654947280884, | |
| "learning_rate": 8.727896995708155e-06, | |
| "loss": 0.5291, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.6401935980640193, | |
| "grad_norm": 1.6228729486465454, | |
| "learning_rate": 8.723495102894246e-06, | |
| "loss": 0.5318, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.6423935760642393, | |
| "grad_norm": 2.1541671752929688, | |
| "learning_rate": 8.719093210080334e-06, | |
| "loss": 0.5376, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.6445935540644594, | |
| "grad_norm": 2.0600032806396484, | |
| "learning_rate": 8.714691317266424e-06, | |
| "loss": 0.5342, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.6467935320646794, | |
| "grad_norm": 1.673624873161316, | |
| "learning_rate": 8.710289424452515e-06, | |
| "loss": 0.5533, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.6489935100648994, | |
| "grad_norm": 1.8217624425888062, | |
| "learning_rate": 8.705887531638605e-06, | |
| "loss": 0.526, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.6511934880651193, | |
| "grad_norm": 2.1350643634796143, | |
| "learning_rate": 8.701485638824695e-06, | |
| "loss": 0.5254, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.6533934660653393, | |
| "grad_norm": 1.7675269842147827, | |
| "learning_rate": 8.697083746010786e-06, | |
| "loss": 0.5191, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.6555934440655593, | |
| "grad_norm": 2.134058952331543, | |
| "learning_rate": 8.692681853196874e-06, | |
| "loss": 0.5329, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.6577934220657793, | |
| "grad_norm": 1.6623740196228027, | |
| "learning_rate": 8.688279960382964e-06, | |
| "loss": 0.5287, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.6599934000659994, | |
| "grad_norm": 2.05334210395813, | |
| "learning_rate": 8.683878067569056e-06, | |
| "loss": 0.5393, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.6621933780662194, | |
| "grad_norm": 1.7684849500656128, | |
| "learning_rate": 8.679476174755147e-06, | |
| "loss": 0.527, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.6643933560664393, | |
| "grad_norm": 1.825725793838501, | |
| "learning_rate": 8.675074281941235e-06, | |
| "loss": 0.5314, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.6665933340666593, | |
| "grad_norm": 1.9619163274765015, | |
| "learning_rate": 8.670672389127326e-06, | |
| "loss": 0.5238, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.6687933120668793, | |
| "grad_norm": 1.7254787683486938, | |
| "learning_rate": 8.666270496313416e-06, | |
| "loss": 0.5253, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.6709932900670993, | |
| "grad_norm": 1.739046335220337, | |
| "learning_rate": 8.661868603499506e-06, | |
| "loss": 0.5452, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.6731932680673193, | |
| "grad_norm": 1.9458619356155396, | |
| "learning_rate": 8.657466710685596e-06, | |
| "loss": 0.5253, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.6753932460675394, | |
| "grad_norm": 1.9501069784164429, | |
| "learning_rate": 8.653064817871687e-06, | |
| "loss": 0.5313, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.6775932240677593, | |
| "grad_norm": 1.4754610061645508, | |
| "learning_rate": 8.648662925057775e-06, | |
| "loss": 0.5409, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.6797932020679793, | |
| "grad_norm": 1.7951412200927734, | |
| "learning_rate": 8.644261032243866e-06, | |
| "loss": 0.558, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.6819931800681993, | |
| "grad_norm": 1.5883880853652954, | |
| "learning_rate": 8.639859139429956e-06, | |
| "loss": 0.5668, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.6841931580684193, | |
| "grad_norm": 1.7715564966201782, | |
| "learning_rate": 8.635457246616046e-06, | |
| "loss": 0.5567, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.6863931360686393, | |
| "grad_norm": 1.7103959321975708, | |
| "learning_rate": 8.631055353802136e-06, | |
| "loss": 0.5646, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.6885931140688593, | |
| "grad_norm": 2.053924322128296, | |
| "learning_rate": 8.626653460988225e-06, | |
| "loss": 0.5554, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.6907930920690794, | |
| "grad_norm": 1.3964165449142456, | |
| "learning_rate": 8.622251568174315e-06, | |
| "loss": 0.5341, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.6929930700692993, | |
| "grad_norm": 1.623286485671997, | |
| "learning_rate": 8.617849675360406e-06, | |
| "loss": 0.5475, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.6951930480695193, | |
| "grad_norm": 1.5909929275512695, | |
| "learning_rate": 8.613447782546496e-06, | |
| "loss": 0.543, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.6973930260697393, | |
| "grad_norm": 1.6793596744537354, | |
| "learning_rate": 8.609045889732586e-06, | |
| "loss": 0.5642, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.6995930040699593, | |
| "grad_norm": 1.5003210306167603, | |
| "learning_rate": 8.604643996918676e-06, | |
| "loss": 0.5528, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.7017929820701793, | |
| "grad_norm": 1.6098058223724365, | |
| "learning_rate": 8.600242104104765e-06, | |
| "loss": 0.5591, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.7039929600703992, | |
| "grad_norm": 1.8180344104766846, | |
| "learning_rate": 8.595840211290855e-06, | |
| "loss": 0.5575, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.7061929380706193, | |
| "grad_norm": 1.6185832023620605, | |
| "learning_rate": 8.591438318476946e-06, | |
| "loss": 0.5555, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.7083929160708393, | |
| "grad_norm": 1.7686482667922974, | |
| "learning_rate": 8.587036425663036e-06, | |
| "loss": 0.5562, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.7105928940710593, | |
| "grad_norm": 1.6809719800949097, | |
| "learning_rate": 8.582634532849126e-06, | |
| "loss": 0.5519, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.7127928720712793, | |
| "grad_norm": 1.8532384634017944, | |
| "learning_rate": 8.578232640035216e-06, | |
| "loss": 0.5466, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.7149928500714993, | |
| "grad_norm": 1.6389007568359375, | |
| "learning_rate": 8.573830747221305e-06, | |
| "loss": 0.5527, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.7171928280717192, | |
| "grad_norm": 1.6388925313949585, | |
| "learning_rate": 8.569428854407395e-06, | |
| "loss": 0.5439, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.7193928060719392, | |
| "grad_norm": 1.7384296655654907, | |
| "learning_rate": 8.565026961593486e-06, | |
| "loss": 0.5375, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.7215927840721593, | |
| "grad_norm": 1.7327488660812378, | |
| "learning_rate": 8.560625068779576e-06, | |
| "loss": 0.5548, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.7237927620723793, | |
| "grad_norm": 1.564349889755249, | |
| "learning_rate": 8.556223175965666e-06, | |
| "loss": 0.5573, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.7259927400725993, | |
| "grad_norm": 1.8052953481674194, | |
| "learning_rate": 8.551821283151756e-06, | |
| "loss": 0.524, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.7281927180728193, | |
| "grad_norm": 1.5981229543685913, | |
| "learning_rate": 8.547419390337845e-06, | |
| "loss": 0.5449, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.7303926960730392, | |
| "grad_norm": 1.4789613485336304, | |
| "learning_rate": 8.543017497523935e-06, | |
| "loss": 0.5356, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.7325926740732592, | |
| "grad_norm": 1.8192943334579468, | |
| "learning_rate": 8.538615604710026e-06, | |
| "loss": 0.5691, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.7347926520734792, | |
| "grad_norm": 1.874607801437378, | |
| "learning_rate": 8.534213711896116e-06, | |
| "loss": 0.5539, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.7369926300736993, | |
| "grad_norm": 1.6394860744476318, | |
| "learning_rate": 8.529811819082206e-06, | |
| "loss": 0.5653, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.7391926080739193, | |
| "grad_norm": 1.9063067436218262, | |
| "learning_rate": 8.525409926268296e-06, | |
| "loss": 0.5515, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.7413925860741393, | |
| "grad_norm": 1.6854544878005981, | |
| "learning_rate": 8.521008033454387e-06, | |
| "loss": 0.5534, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.7435925640743593, | |
| "grad_norm": 1.7821418046951294, | |
| "learning_rate": 8.516606140640477e-06, | |
| "loss": 0.5521, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.7457925420745792, | |
| "grad_norm": 1.5063166618347168, | |
| "learning_rate": 8.512204247826567e-06, | |
| "loss": 0.5667, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.7479925200747992, | |
| "grad_norm": 1.9604572057724, | |
| "learning_rate": 8.507802355012656e-06, | |
| "loss": 0.5434, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.7501924980750192, | |
| "grad_norm": 1.8538181781768799, | |
| "learning_rate": 8.503400462198746e-06, | |
| "loss": 0.5366, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.7523924760752393, | |
| "grad_norm": 1.8284313678741455, | |
| "learning_rate": 8.498998569384836e-06, | |
| "loss": 0.5549, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.7545924540754593, | |
| "grad_norm": 1.5392765998840332, | |
| "learning_rate": 8.494596676570927e-06, | |
| "loss": 0.5459, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.7567924320756793, | |
| "grad_norm": 1.601608157157898, | |
| "learning_rate": 8.490194783757017e-06, | |
| "loss": 0.5478, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.7589924100758992, | |
| "grad_norm": 1.602129340171814, | |
| "learning_rate": 8.485792890943107e-06, | |
| "loss": 0.5264, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.7611923880761192, | |
| "grad_norm": 1.5455442667007446, | |
| "learning_rate": 8.481390998129196e-06, | |
| "loss": 0.5452, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.7633923660763392, | |
| "grad_norm": 1.7308459281921387, | |
| "learning_rate": 8.476989105315286e-06, | |
| "loss": 0.5346, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.7655923440765592, | |
| "grad_norm": 1.9421132802963257, | |
| "learning_rate": 8.472587212501376e-06, | |
| "loss": 0.5502, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.7677923220767793, | |
| "grad_norm": 1.6126275062561035, | |
| "learning_rate": 8.468185319687467e-06, | |
| "loss": 0.5531, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.7699923000769993, | |
| "grad_norm": 1.9307098388671875, | |
| "learning_rate": 8.463783426873557e-06, | |
| "loss": 0.5451, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.7721922780772192, | |
| "grad_norm": 1.785501480102539, | |
| "learning_rate": 8.459381534059646e-06, | |
| "loss": 0.5657, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.7743922560774392, | |
| "grad_norm": 1.3118321895599365, | |
| "learning_rate": 8.454979641245736e-06, | |
| "loss": 0.5425, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.7765922340776592, | |
| "grad_norm": 1.6785212755203247, | |
| "learning_rate": 8.450577748431826e-06, | |
| "loss": 0.5608, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.7787922120778792, | |
| "grad_norm": 1.687156081199646, | |
| "learning_rate": 8.446175855617916e-06, | |
| "loss": 0.5268, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.7809921900780992, | |
| "grad_norm": 1.6766939163208008, | |
| "learning_rate": 8.441773962804007e-06, | |
| "loss": 0.5505, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.7831921680783193, | |
| "grad_norm": 1.3873755931854248, | |
| "learning_rate": 8.437372069990097e-06, | |
| "loss": 0.5346, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.7853921460785392, | |
| "grad_norm": 1.4507646560668945, | |
| "learning_rate": 8.432970177176186e-06, | |
| "loss": 0.5456, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.7875921240787592, | |
| "grad_norm": 1.7354850769042969, | |
| "learning_rate": 8.428568284362276e-06, | |
| "loss": 0.5502, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.7897921020789792, | |
| "grad_norm": 1.4922300577163696, | |
| "learning_rate": 8.424166391548366e-06, | |
| "loss": 0.5628, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.7919920800791992, | |
| "grad_norm": 1.722380518913269, | |
| "learning_rate": 8.419764498734456e-06, | |
| "loss": 0.5556, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.7941920580794192, | |
| "grad_norm": 1.905194640159607, | |
| "learning_rate": 8.415362605920547e-06, | |
| "loss": 0.5529, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.7963920360796392, | |
| "grad_norm": 2.140815496444702, | |
| "learning_rate": 8.410960713106637e-06, | |
| "loss": 0.5567, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.7985920140798592, | |
| "grad_norm": 1.5261491537094116, | |
| "learning_rate": 8.406558820292726e-06, | |
| "loss": 0.554, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.8007919920800792, | |
| "grad_norm": 1.6273101568222046, | |
| "learning_rate": 8.402156927478816e-06, | |
| "loss": 0.5534, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.8029919700802992, | |
| "grad_norm": 1.7818236351013184, | |
| "learning_rate": 8.397755034664906e-06, | |
| "loss": 0.5408, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.8051919480805192, | |
| "grad_norm": 1.9317457675933838, | |
| "learning_rate": 8.393353141850996e-06, | |
| "loss": 0.5726, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.8073919260807392, | |
| "grad_norm": 1.813769817352295, | |
| "learning_rate": 8.388951249037087e-06, | |
| "loss": 0.5605, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.8095919040809592, | |
| "grad_norm": 1.9883424043655396, | |
| "learning_rate": 8.384549356223177e-06, | |
| "loss": 0.5489, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.8117918820811791, | |
| "grad_norm": 1.709024429321289, | |
| "learning_rate": 8.380147463409267e-06, | |
| "loss": 0.5411, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.8139918600813992, | |
| "grad_norm": 1.4431244134902954, | |
| "learning_rate": 8.375745570595357e-06, | |
| "loss": 0.5472, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.8161918380816192, | |
| "grad_norm": 1.5251537561416626, | |
| "learning_rate": 8.371343677781448e-06, | |
| "loss": 0.5479, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.8183918160818392, | |
| "grad_norm": 1.687023401260376, | |
| "learning_rate": 8.366941784967536e-06, | |
| "loss": 0.543, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.8205917940820592, | |
| "grad_norm": 1.5462446212768555, | |
| "learning_rate": 8.362539892153627e-06, | |
| "loss": 0.55, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.8227917720822792, | |
| "grad_norm": 1.984750747680664, | |
| "learning_rate": 8.358137999339717e-06, | |
| "loss": 0.5495, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.8249917500824991, | |
| "grad_norm": 1.6375317573547363, | |
| "learning_rate": 8.353736106525807e-06, | |
| "loss": 0.5479, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.8271917280827191, | |
| "grad_norm": 1.8285633325576782, | |
| "learning_rate": 8.349334213711897e-06, | |
| "loss": 0.5398, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.8293917060829392, | |
| "grad_norm": 1.7603964805603027, | |
| "learning_rate": 8.344932320897988e-06, | |
| "loss": 0.5343, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.8315916840831592, | |
| "grad_norm": 1.4836808443069458, | |
| "learning_rate": 8.340530428084076e-06, | |
| "loss": 0.5559, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.8337916620833792, | |
| "grad_norm": 1.4867973327636719, | |
| "learning_rate": 8.336128535270167e-06, | |
| "loss": 0.5433, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.8359916400835992, | |
| "grad_norm": 1.784264326095581, | |
| "learning_rate": 8.331726642456257e-06, | |
| "loss": 0.5451, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.8381916180838191, | |
| "grad_norm": 1.3747423887252808, | |
| "learning_rate": 8.327324749642347e-06, | |
| "loss": 0.538, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.8403915960840391, | |
| "grad_norm": 1.8073352575302124, | |
| "learning_rate": 8.322922856828437e-06, | |
| "loss": 0.545, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.8425915740842591, | |
| "grad_norm": 1.6162651777267456, | |
| "learning_rate": 8.318520964014528e-06, | |
| "loss": 0.5448, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.8447915520844792, | |
| "grad_norm": 1.6627821922302246, | |
| "learning_rate": 8.314119071200616e-06, | |
| "loss": 0.5504, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.8469915300846992, | |
| "grad_norm": 1.594759464263916, | |
| "learning_rate": 8.309717178386707e-06, | |
| "loss": 0.5344, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.8491915080849192, | |
| "grad_norm": 1.7449952363967896, | |
| "learning_rate": 8.305315285572797e-06, | |
| "loss": 0.5558, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.8513914860851391, | |
| "grad_norm": 1.6787577867507935, | |
| "learning_rate": 8.300913392758887e-06, | |
| "loss": 0.5282, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.8535914640853591, | |
| "grad_norm": 2.2145471572875977, | |
| "learning_rate": 8.296511499944977e-06, | |
| "loss": 0.5371, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.8557914420855791, | |
| "grad_norm": 1.7959023714065552, | |
| "learning_rate": 8.292109607131068e-06, | |
| "loss": 0.5467, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.8579914200857991, | |
| "grad_norm": 1.7362741231918335, | |
| "learning_rate": 8.287707714317156e-06, | |
| "loss": 0.5334, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.8601913980860192, | |
| "grad_norm": 1.471660852432251, | |
| "learning_rate": 8.283305821503247e-06, | |
| "loss": 0.5563, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.8623913760862392, | |
| "grad_norm": 1.9247560501098633, | |
| "learning_rate": 8.278903928689337e-06, | |
| "loss": 0.5422, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.8645913540864592, | |
| "grad_norm": 1.4459770917892456, | |
| "learning_rate": 8.274502035875427e-06, | |
| "loss": 0.5549, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.8667913320866791, | |
| "grad_norm": 1.8843663930892944, | |
| "learning_rate": 8.270100143061517e-06, | |
| "loss": 0.5463, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.8689913100868991, | |
| "grad_norm": 1.6664437055587769, | |
| "learning_rate": 8.265698250247606e-06, | |
| "loss": 0.557, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.8711912880871191, | |
| "grad_norm": 1.8281344175338745, | |
| "learning_rate": 8.261296357433696e-06, | |
| "loss": 0.5306, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.8733912660873391, | |
| "grad_norm": 1.9608473777770996, | |
| "learning_rate": 8.256894464619787e-06, | |
| "loss": 0.5458, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.8755912440875592, | |
| "grad_norm": 1.9003684520721436, | |
| "learning_rate": 8.252492571805877e-06, | |
| "loss": 0.55, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.8777912220877792, | |
| "grad_norm": 1.8628289699554443, | |
| "learning_rate": 8.248090678991967e-06, | |
| "loss": 0.5379, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.8799912000879991, | |
| "grad_norm": 1.5854053497314453, | |
| "learning_rate": 8.243688786178057e-06, | |
| "loss": 0.5352, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.8821911780882191, | |
| "grad_norm": 1.957435965538025, | |
| "learning_rate": 8.239286893364146e-06, | |
| "loss": 0.5358, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.8843911560884391, | |
| "grad_norm": 1.838132381439209, | |
| "learning_rate": 8.234885000550236e-06, | |
| "loss": 0.5423, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.8865911340886591, | |
| "grad_norm": 1.936266541481018, | |
| "learning_rate": 8.230483107736327e-06, | |
| "loss": 0.5335, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.8887911120888791, | |
| "grad_norm": 1.5629870891571045, | |
| "learning_rate": 8.226081214922419e-06, | |
| "loss": 0.5354, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.8909910900890992, | |
| "grad_norm": 1.7080520391464233, | |
| "learning_rate": 8.221679322108507e-06, | |
| "loss": 0.5532, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.8931910680893191, | |
| "grad_norm": 1.795921802520752, | |
| "learning_rate": 8.217277429294597e-06, | |
| "loss": 0.5528, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.8953910460895391, | |
| "grad_norm": 1.955198884010315, | |
| "learning_rate": 8.212875536480688e-06, | |
| "loss": 0.5598, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.8975910240897591, | |
| "grad_norm": 1.865143895149231, | |
| "learning_rate": 8.208473643666778e-06, | |
| "loss": 0.5371, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.8997910020899791, | |
| "grad_norm": 1.8305407762527466, | |
| "learning_rate": 8.204071750852868e-06, | |
| "loss": 0.5459, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.9019909800901991, | |
| "grad_norm": 2.158996820449829, | |
| "learning_rate": 8.199669858038959e-06, | |
| "loss": 0.5477, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.904190958090419, | |
| "grad_norm": 1.5184693336486816, | |
| "learning_rate": 8.195267965225047e-06, | |
| "loss": 0.5536, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.9063909360906391, | |
| "grad_norm": 1.2580761909484863, | |
| "learning_rate": 8.190866072411137e-06, | |
| "loss": 0.5444, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.9085909140908591, | |
| "grad_norm": 1.5662882328033447, | |
| "learning_rate": 8.186464179597228e-06, | |
| "loss": 0.5474, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.9107908920910791, | |
| "grad_norm": 1.775161623954773, | |
| "learning_rate": 8.182062286783318e-06, | |
| "loss": 0.5405, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.9129908700912991, | |
| "grad_norm": 1.604435920715332, | |
| "learning_rate": 8.177660393969408e-06, | |
| "loss": 0.5425, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.9151908480915191, | |
| "grad_norm": 1.9549158811569214, | |
| "learning_rate": 8.173258501155497e-06, | |
| "loss": 0.5398, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.917390826091739, | |
| "grad_norm": 1.4547535181045532, | |
| "learning_rate": 8.168856608341587e-06, | |
| "loss": 0.5511, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.919590804091959, | |
| "grad_norm": 1.8771201372146606, | |
| "learning_rate": 8.164454715527677e-06, | |
| "loss": 0.5481, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.9217907820921791, | |
| "grad_norm": 2.0473129749298096, | |
| "learning_rate": 8.160052822713768e-06, | |
| "loss": 0.5418, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.9239907600923991, | |
| "grad_norm": 1.8082759380340576, | |
| "learning_rate": 8.155650929899858e-06, | |
| "loss": 0.5346, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.9261907380926191, | |
| "grad_norm": 1.8849467039108276, | |
| "learning_rate": 8.151249037085948e-06, | |
| "loss": 0.5563, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.9283907160928391, | |
| "grad_norm": 1.6767569780349731, | |
| "learning_rate": 8.146847144272037e-06, | |
| "loss": 0.536, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.9305906940930591, | |
| "grad_norm": 1.9930092096328735, | |
| "learning_rate": 8.142445251458127e-06, | |
| "loss": 0.5507, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.932790672093279, | |
| "grad_norm": 1.9420870542526245, | |
| "learning_rate": 8.138043358644217e-06, | |
| "loss": 0.5405, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.934990650093499, | |
| "grad_norm": 1.6965640783309937, | |
| "learning_rate": 8.133641465830308e-06, | |
| "loss": 0.5469, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.9371906280937191, | |
| "grad_norm": 1.4808323383331299, | |
| "learning_rate": 8.129239573016398e-06, | |
| "loss": 0.5341, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.9393906060939391, | |
| "grad_norm": 1.516119122505188, | |
| "learning_rate": 8.124837680202488e-06, | |
| "loss": 0.5515, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.9415905840941591, | |
| "grad_norm": 1.6243934631347656, | |
| "learning_rate": 8.120435787388577e-06, | |
| "loss": 0.541, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.9437905620943791, | |
| "grad_norm": 1.6918444633483887, | |
| "learning_rate": 8.116033894574667e-06, | |
| "loss": 0.5302, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.945990540094599, | |
| "grad_norm": 1.6359889507293701, | |
| "learning_rate": 8.111632001760757e-06, | |
| "loss": 0.5295, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.948190518094819, | |
| "grad_norm": 1.7587625980377197, | |
| "learning_rate": 8.107230108946848e-06, | |
| "loss": 0.5415, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.950390496095039, | |
| "grad_norm": 1.8017805814743042, | |
| "learning_rate": 8.102828216132938e-06, | |
| "loss": 0.5422, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.9525904740952591, | |
| "grad_norm": 1.970982313156128, | |
| "learning_rate": 8.098426323319027e-06, | |
| "loss": 0.5296, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.9547904520954791, | |
| "grad_norm": 1.8112688064575195, | |
| "learning_rate": 8.094024430505117e-06, | |
| "loss": 0.5539, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.9569904300956991, | |
| "grad_norm": 1.7808321714401245, | |
| "learning_rate": 8.089622537691207e-06, | |
| "loss": 0.5498, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.959190408095919, | |
| "grad_norm": 1.9657952785491943, | |
| "learning_rate": 8.085220644877297e-06, | |
| "loss": 0.5424, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.961390386096139, | |
| "grad_norm": 1.8520526885986328, | |
| "learning_rate": 8.080818752063388e-06, | |
| "loss": 0.5392, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.963590364096359, | |
| "grad_norm": 1.7919948101043701, | |
| "learning_rate": 8.076416859249478e-06, | |
| "loss": 0.532, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.965790342096579, | |
| "grad_norm": 1.600967288017273, | |
| "learning_rate": 8.072014966435568e-06, | |
| "loss": 0.5406, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.9679903200967991, | |
| "grad_norm": 1.638075351715088, | |
| "learning_rate": 8.067613073621659e-06, | |
| "loss": 0.553, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.9701902980970191, | |
| "grad_norm": 1.5249767303466797, | |
| "learning_rate": 8.063211180807749e-06, | |
| "loss": 0.5533, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.972390276097239, | |
| "grad_norm": 1.6304973363876343, | |
| "learning_rate": 8.05880928799384e-06, | |
| "loss": 0.5377, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.974590254097459, | |
| "grad_norm": 1.8152045011520386, | |
| "learning_rate": 8.054407395179928e-06, | |
| "loss": 0.5284, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.976790232097679, | |
| "grad_norm": 1.652199625968933, | |
| "learning_rate": 8.050005502366018e-06, | |
| "loss": 0.5448, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.978990210097899, | |
| "grad_norm": 1.7338589429855347, | |
| "learning_rate": 8.045603609552108e-06, | |
| "loss": 0.5395, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.981190188098119, | |
| "grad_norm": 1.5801849365234375, | |
| "learning_rate": 8.041201716738199e-06, | |
| "loss": 0.5297, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.9833901660983391, | |
| "grad_norm": 2.031813621520996, | |
| "learning_rate": 8.036799823924289e-06, | |
| "loss": 0.5617, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.985590144098559, | |
| "grad_norm": 1.934370756149292, | |
| "learning_rate": 8.03239793111038e-06, | |
| "loss": 0.5329, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.987790122098779, | |
| "grad_norm": 1.849741816520691, | |
| "learning_rate": 8.027996038296468e-06, | |
| "loss": 0.5413, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.989990100098999, | |
| "grad_norm": 1.757784366607666, | |
| "learning_rate": 8.023594145482558e-06, | |
| "loss": 0.5319, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.992190078099219, | |
| "grad_norm": 1.6084299087524414, | |
| "learning_rate": 8.019192252668648e-06, | |
| "loss": 0.5465, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.994390056099439, | |
| "grad_norm": 1.9279767274856567, | |
| "learning_rate": 8.014790359854739e-06, | |
| "loss": 0.5425, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.996590034099659, | |
| "grad_norm": 1.5739712715148926, | |
| "learning_rate": 8.010388467040829e-06, | |
| "loss": 0.5471, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.998790012099879, | |
| "grad_norm": 1.5087926387786865, | |
| "learning_rate": 8.005986574226917e-06, | |
| "loss": 0.5417, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.000989990100099, | |
| "grad_norm": 2.411069393157959, | |
| "learning_rate": 8.001584681413008e-06, | |
| "loss": 0.5328, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.003189968100319, | |
| "grad_norm": 2.560279607772827, | |
| "learning_rate": 7.997182788599098e-06, | |
| "loss": 0.5018, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.005389946100539, | |
| "grad_norm": 1.8764352798461914, | |
| "learning_rate": 7.992780895785188e-06, | |
| "loss": 0.4947, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.007589924100759, | |
| "grad_norm": 2.0531773567199707, | |
| "learning_rate": 7.988379002971279e-06, | |
| "loss": 0.5016, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.009789902100979, | |
| "grad_norm": 2.1719043254852295, | |
| "learning_rate": 7.983977110157369e-06, | |
| "loss": 0.504, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.011989880101199, | |
| "grad_norm": 1.8235334157943726, | |
| "learning_rate": 7.979575217343457e-06, | |
| "loss": 0.4967, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.014189858101419, | |
| "grad_norm": 2.329827308654785, | |
| "learning_rate": 7.975173324529548e-06, | |
| "loss": 0.5121, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.016389836101639, | |
| "grad_norm": 2.2712931632995605, | |
| "learning_rate": 7.970771431715638e-06, | |
| "loss": 0.4901, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.018589814101859, | |
| "grad_norm": 1.9942501783370972, | |
| "learning_rate": 7.966369538901728e-06, | |
| "loss": 0.5052, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.020789792102079, | |
| "grad_norm": 2.014451742172241, | |
| "learning_rate": 7.961967646087819e-06, | |
| "loss": 0.5117, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.022989770102299, | |
| "grad_norm": 2.1809909343719482, | |
| "learning_rate": 7.957565753273909e-06, | |
| "loss": 0.5106, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.025189748102519, | |
| "grad_norm": 1.6118221282958984, | |
| "learning_rate": 7.953163860459997e-06, | |
| "loss": 0.4959, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.027389726102739, | |
| "grad_norm": 1.9853328466415405, | |
| "learning_rate": 7.948761967646088e-06, | |
| "loss": 0.5127, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.029589704102959, | |
| "grad_norm": 2.3931078910827637, | |
| "learning_rate": 7.944360074832178e-06, | |
| "loss": 0.5084, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.031789682103179, | |
| "grad_norm": 1.6679604053497314, | |
| "learning_rate": 7.939958182018268e-06, | |
| "loss": 0.4913, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.0339896601033989, | |
| "grad_norm": 2.377412796020508, | |
| "learning_rate": 7.935556289204359e-06, | |
| "loss": 0.4915, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.0361896381036189, | |
| "grad_norm": 2.0759618282318115, | |
| "learning_rate": 7.931154396390449e-06, | |
| "loss": 0.5011, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.038389616103839, | |
| "grad_norm": 2.061979055404663, | |
| "learning_rate": 7.926752503576537e-06, | |
| "loss": 0.4945, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.040589594104059, | |
| "grad_norm": 1.912423849105835, | |
| "learning_rate": 7.92235061076263e-06, | |
| "loss": 0.496, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.042789572104279, | |
| "grad_norm": 2.3455774784088135, | |
| "learning_rate": 7.91794871794872e-06, | |
| "loss": 0.5063, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.044989550104499, | |
| "grad_norm": 1.7976536750793457, | |
| "learning_rate": 7.913546825134808e-06, | |
| "loss": 0.5053, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.047189528104719, | |
| "grad_norm": 2.056267023086548, | |
| "learning_rate": 7.909144932320899e-06, | |
| "loss": 0.4939, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.049389506104939, | |
| "grad_norm": 2.216721534729004, | |
| "learning_rate": 7.904743039506989e-06, | |
| "loss": 0.5007, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.051589484105159, | |
| "grad_norm": 1.4782536029815674, | |
| "learning_rate": 7.90034114669308e-06, | |
| "loss": 0.4765, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.053789462105379, | |
| "grad_norm": 1.739716649055481, | |
| "learning_rate": 7.89593925387917e-06, | |
| "loss": 0.5245, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.055989440105599, | |
| "grad_norm": 1.5695744752883911, | |
| "learning_rate": 7.89153736106526e-06, | |
| "loss": 0.511, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.058189418105819, | |
| "grad_norm": 2.0835139751434326, | |
| "learning_rate": 7.887135468251348e-06, | |
| "loss": 0.4989, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.0603893961060389, | |
| "grad_norm": 1.9040948152542114, | |
| "learning_rate": 7.882733575437439e-06, | |
| "loss": 0.5001, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.0625893741062589, | |
| "grad_norm": 2.1570136547088623, | |
| "learning_rate": 7.878331682623529e-06, | |
| "loss": 0.5031, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.0647893521064788, | |
| "grad_norm": 1.8248552083969116, | |
| "learning_rate": 7.873929789809619e-06, | |
| "loss": 0.504, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.0669893301066988, | |
| "grad_norm": 1.8128606081008911, | |
| "learning_rate": 7.86952789699571e-06, | |
| "loss": 0.4825, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.069189308106919, | |
| "grad_norm": 2.15380597114563, | |
| "learning_rate": 7.8651260041818e-06, | |
| "loss": 0.4843, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.071389286107139, | |
| "grad_norm": 2.4410858154296875, | |
| "learning_rate": 7.860724111367888e-06, | |
| "loss": 0.4973, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.073589264107359, | |
| "grad_norm": 1.9602640867233276, | |
| "learning_rate": 7.856322218553979e-06, | |
| "loss": 0.5039, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.075789242107579, | |
| "grad_norm": 2.189321994781494, | |
| "learning_rate": 7.851920325740069e-06, | |
| "loss": 0.5002, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.077989220107799, | |
| "grad_norm": 2.153059244155884, | |
| "learning_rate": 7.847518432926159e-06, | |
| "loss": 0.5074, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.080189198108019, | |
| "grad_norm": 1.9804766178131104, | |
| "learning_rate": 7.84311654011225e-06, | |
| "loss": 0.4981, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.082389176108239, | |
| "grad_norm": 2.228227376937866, | |
| "learning_rate": 7.838714647298338e-06, | |
| "loss": 0.5115, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.084589154108459, | |
| "grad_norm": 2.639230489730835, | |
| "learning_rate": 7.834312754484428e-06, | |
| "loss": 0.4956, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.086789132108679, | |
| "grad_norm": 2.2388269901275635, | |
| "learning_rate": 7.829910861670519e-06, | |
| "loss": 0.4957, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.0889891101088989, | |
| "grad_norm": 2.2344448566436768, | |
| "learning_rate": 7.825508968856609e-06, | |
| "loss": 0.5191, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.0911890881091189, | |
| "grad_norm": 2.1383955478668213, | |
| "learning_rate": 7.821107076042699e-06, | |
| "loss": 0.5035, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.0933890661093388, | |
| "grad_norm": 2.0469112396240234, | |
| "learning_rate": 7.81670518322879e-06, | |
| "loss": 0.4991, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.0955890441095588, | |
| "grad_norm": 2.091733694076538, | |
| "learning_rate": 7.812303290414878e-06, | |
| "loss": 0.5213, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.0977890221097788, | |
| "grad_norm": 2.2485196590423584, | |
| "learning_rate": 7.807901397600968e-06, | |
| "loss": 0.5159, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.099989000109999, | |
| "grad_norm": 2.335508108139038, | |
| "learning_rate": 7.803499504787059e-06, | |
| "loss": 0.5035, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.099989000109999, | |
| "eval_loss": 0.579010546207428, | |
| "eval_runtime": 378.8096, | |
| "eval_samples_per_second": 158.391, | |
| "eval_steps_per_second": 4.95, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.102188978110219, | |
| "grad_norm": 2.1119778156280518, | |
| "learning_rate": 7.799097611973149e-06, | |
| "loss": 0.5081, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.104388956110439, | |
| "grad_norm": 2.182777166366577, | |
| "learning_rate": 7.794695719159239e-06, | |
| "loss": 0.4925, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.106588934110659, | |
| "grad_norm": 2.2675302028656006, | |
| "learning_rate": 7.79029382634533e-06, | |
| "loss": 0.4865, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.108788912110879, | |
| "grad_norm": 1.858472228050232, | |
| "learning_rate": 7.785891933531418e-06, | |
| "loss": 0.5118, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.110988890111099, | |
| "grad_norm": 1.8882789611816406, | |
| "learning_rate": 7.781490040717508e-06, | |
| "loss": 0.5087, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.113188868111319, | |
| "grad_norm": 1.9170640707015991, | |
| "learning_rate": 7.777088147903599e-06, | |
| "loss": 0.491, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.1153888461115389, | |
| "grad_norm": 1.9825174808502197, | |
| "learning_rate": 7.772686255089689e-06, | |
| "loss": 0.5072, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.1175888241117589, | |
| "grad_norm": 2.3916232585906982, | |
| "learning_rate": 7.768284362275779e-06, | |
| "loss": 0.5111, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.1197888021119788, | |
| "grad_norm": 2.069160223007202, | |
| "learning_rate": 7.76388246946187e-06, | |
| "loss": 0.4927, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.1219887801121988, | |
| "grad_norm": 1.780382752418518, | |
| "learning_rate": 7.75948057664796e-06, | |
| "loss": 0.4959, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.1241887581124188, | |
| "grad_norm": 2.5268094539642334, | |
| "learning_rate": 7.75507868383405e-06, | |
| "loss": 0.4975, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.1263887361126388, | |
| "grad_norm": 1.9989362955093384, | |
| "learning_rate": 7.75067679102014e-06, | |
| "loss": 0.504, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.1285887141128588, | |
| "grad_norm": 2.230954647064209, | |
| "learning_rate": 7.746274898206229e-06, | |
| "loss": 0.5172, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.1307886921130788, | |
| "grad_norm": 2.2332351207733154, | |
| "learning_rate": 7.741873005392319e-06, | |
| "loss": 0.5026, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.132988670113299, | |
| "grad_norm": 2.234415054321289, | |
| "learning_rate": 7.73747111257841e-06, | |
| "loss": 0.5169, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.135188648113519, | |
| "grad_norm": 1.9074784517288208, | |
| "learning_rate": 7.7330692197645e-06, | |
| "loss": 0.4878, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.137388626113739, | |
| "grad_norm": 1.9809048175811768, | |
| "learning_rate": 7.72866732695059e-06, | |
| "loss": 0.4794, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.139588604113959, | |
| "grad_norm": 1.90762460231781, | |
| "learning_rate": 7.72426543413668e-06, | |
| "loss": 0.4996, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.1417885821141789, | |
| "grad_norm": 2.3830220699310303, | |
| "learning_rate": 7.719863541322769e-06, | |
| "loss": 0.5028, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.1439885601143989, | |
| "grad_norm": 2.052335023880005, | |
| "learning_rate": 7.715461648508859e-06, | |
| "loss": 0.5189, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.1461885381146188, | |
| "grad_norm": 2.3055222034454346, | |
| "learning_rate": 7.71105975569495e-06, | |
| "loss": 0.5117, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.1483885161148388, | |
| "grad_norm": 2.7478485107421875, | |
| "learning_rate": 7.70665786288104e-06, | |
| "loss": 0.503, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.1505884941150588, | |
| "grad_norm": 1.8724684715270996, | |
| "learning_rate": 7.70225597006713e-06, | |
| "loss": 0.5017, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.1527884721152788, | |
| "grad_norm": 2.1905338764190674, | |
| "learning_rate": 7.69785407725322e-06, | |
| "loss": 0.4995, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.1549884501154988, | |
| "grad_norm": 2.169680118560791, | |
| "learning_rate": 7.693452184439309e-06, | |
| "loss": 0.5012, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.1571884281157188, | |
| "grad_norm": 2.3531687259674072, | |
| "learning_rate": 7.689050291625399e-06, | |
| "loss": 0.4835, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.159388406115939, | |
| "grad_norm": 1.9876978397369385, | |
| "learning_rate": 7.68464839881149e-06, | |
| "loss": 0.4949, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.161588384116159, | |
| "grad_norm": 2.463718891143799, | |
| "learning_rate": 7.68024650599758e-06, | |
| "loss": 0.5121, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.163788362116379, | |
| "grad_norm": 2.4976985454559326, | |
| "learning_rate": 7.67584461318367e-06, | |
| "loss": 0.4882, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.165988340116599, | |
| "grad_norm": 1.968513011932373, | |
| "learning_rate": 7.67144272036976e-06, | |
| "loss": 0.5052, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.168188318116819, | |
| "grad_norm": 1.998396396636963, | |
| "learning_rate": 7.667040827555849e-06, | |
| "loss": 0.4912, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.1703882961170389, | |
| "grad_norm": 2.0211946964263916, | |
| "learning_rate": 7.662638934741939e-06, | |
| "loss": 0.5087, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.1725882741172589, | |
| "grad_norm": 1.97858464717865, | |
| "learning_rate": 7.65823704192803e-06, | |
| "loss": 0.5015, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.1747882521174788, | |
| "grad_norm": 2.1665027141571045, | |
| "learning_rate": 7.65383514911412e-06, | |
| "loss": 0.5088, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.1769882301176988, | |
| "grad_norm": 2.3747305870056152, | |
| "learning_rate": 7.64943325630021e-06, | |
| "loss": 0.4971, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.1791882081179188, | |
| "grad_norm": 2.0653445720672607, | |
| "learning_rate": 7.645031363486299e-06, | |
| "loss": 0.4999, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.1813881861181388, | |
| "grad_norm": 2.0202314853668213, | |
| "learning_rate": 7.640629470672389e-06, | |
| "loss": 0.4857, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.1835881641183588, | |
| "grad_norm": 2.1644513607025146, | |
| "learning_rate": 7.636227577858479e-06, | |
| "loss": 0.4925, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.1857881421185787, | |
| "grad_norm": 2.2284882068634033, | |
| "learning_rate": 7.63182568504457e-06, | |
| "loss": 0.5076, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.1879881201187987, | |
| "grad_norm": 1.9216992855072021, | |
| "learning_rate": 7.62742379223066e-06, | |
| "loss": 0.4937, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.1901880981190187, | |
| "grad_norm": 2.151033401489258, | |
| "learning_rate": 7.623021899416749e-06, | |
| "loss": 0.5042, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.1923880761192387, | |
| "grad_norm": 2.544735908508301, | |
| "learning_rate": 7.618620006602839e-06, | |
| "loss": 0.5016, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.194588054119459, | |
| "grad_norm": 2.404811382293701, | |
| "learning_rate": 7.6142181137889306e-06, | |
| "loss": 0.4859, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.1967880321196789, | |
| "grad_norm": 2.071399450302124, | |
| "learning_rate": 7.60981622097502e-06, | |
| "loss": 0.5009, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.1989880101198989, | |
| "grad_norm": 2.0729258060455322, | |
| "learning_rate": 7.60541432816111e-06, | |
| "loss": 0.5068, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.2011879881201188, | |
| "grad_norm": 1.9438556432724, | |
| "learning_rate": 7.6010124353472006e-06, | |
| "loss": 0.5151, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.2033879661203388, | |
| "grad_norm": 2.3928163051605225, | |
| "learning_rate": 7.59661054253329e-06, | |
| "loss": 0.5152, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.2055879441205588, | |
| "grad_norm": 2.0218889713287354, | |
| "learning_rate": 7.59220864971938e-06, | |
| "loss": 0.4935, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.2077879221207788, | |
| "grad_norm": 2.0265040397644043, | |
| "learning_rate": 7.5878067569054706e-06, | |
| "loss": 0.4995, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.2099879001209988, | |
| "grad_norm": 2.6148312091827393, | |
| "learning_rate": 7.58340486409156e-06, | |
| "loss": 0.5082, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.2121878781212188, | |
| "grad_norm": 2.4383389949798584, | |
| "learning_rate": 7.57900297127765e-06, | |
| "loss": 0.4982, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.2143878561214387, | |
| "grad_norm": 2.649778366088867, | |
| "learning_rate": 7.5746010784637406e-06, | |
| "loss": 0.4974, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.2165878341216587, | |
| "grad_norm": 2.525026559829712, | |
| "learning_rate": 7.57019918564983e-06, | |
| "loss": 0.4953, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.2187878121218787, | |
| "grad_norm": 2.795290470123291, | |
| "learning_rate": 7.56579729283592e-06, | |
| "loss": 0.5118, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.220987790122099, | |
| "grad_norm": 1.8484504222869873, | |
| "learning_rate": 7.5613954000220105e-06, | |
| "loss": 0.4897, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.2231877681223189, | |
| "grad_norm": 2.673802614212036, | |
| "learning_rate": 7.5569935072081e-06, | |
| "loss": 0.4856, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.2253877461225389, | |
| "grad_norm": 2.250032663345337, | |
| "learning_rate": 7.55259161439419e-06, | |
| "loss": 0.4942, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.2275877241227588, | |
| "grad_norm": 2.281285285949707, | |
| "learning_rate": 7.5481897215802805e-06, | |
| "loss": 0.492, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.2297877021229788, | |
| "grad_norm": 2.1768269538879395, | |
| "learning_rate": 7.54378782876637e-06, | |
| "loss": 0.5014, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 1.2319876801231988, | |
| "grad_norm": 2.172852039337158, | |
| "learning_rate": 7.53938593595246e-06, | |
| "loss": 0.5055, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.2341876581234188, | |
| "grad_norm": 2.2055068016052246, | |
| "learning_rate": 7.5349840431385505e-06, | |
| "loss": 0.4994, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 1.2363876361236388, | |
| "grad_norm": 2.2056238651275635, | |
| "learning_rate": 7.53058215032464e-06, | |
| "loss": 0.5082, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 1.2385876141238588, | |
| "grad_norm": 1.8684000968933105, | |
| "learning_rate": 7.52618025751073e-06, | |
| "loss": 0.5001, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 1.2407875921240787, | |
| "grad_norm": 1.8799563646316528, | |
| "learning_rate": 7.52177836469682e-06, | |
| "loss": 0.4863, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 1.2429875701242987, | |
| "grad_norm": 2.0053553581237793, | |
| "learning_rate": 7.51737647188291e-06, | |
| "loss": 0.5019, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.2451875481245187, | |
| "grad_norm": 2.526304244995117, | |
| "learning_rate": 7.512974579069e-06, | |
| "loss": 0.4783, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 1.2473875261247387, | |
| "grad_norm": 2.2301254272460938, | |
| "learning_rate": 7.50857268625509e-06, | |
| "loss": 0.4975, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 1.2495875041249587, | |
| "grad_norm": 1.8377426862716675, | |
| "learning_rate": 7.50417079344118e-06, | |
| "loss": 0.4929, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 1.2517874821251787, | |
| "grad_norm": 2.6410109996795654, | |
| "learning_rate": 7.49976890062727e-06, | |
| "loss": 0.4816, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 1.2539874601253986, | |
| "grad_norm": 2.0295798778533936, | |
| "learning_rate": 7.49536700781336e-06, | |
| "loss": 0.5038, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.2561874381256186, | |
| "grad_norm": 2.7886478900909424, | |
| "learning_rate": 7.49096511499945e-06, | |
| "loss": 0.5147, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.2583874161258388, | |
| "grad_norm": 2.330388307571411, | |
| "learning_rate": 7.48656322218554e-06, | |
| "loss": 0.4929, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.2605873941260588, | |
| "grad_norm": 1.756525993347168, | |
| "learning_rate": 7.48216132937163e-06, | |
| "loss": 0.4873, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.2627873721262788, | |
| "grad_norm": 1.7345948219299316, | |
| "learning_rate": 7.47775943655772e-06, | |
| "loss": 0.4906, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.2649873501264988, | |
| "grad_norm": 2.1234254837036133, | |
| "learning_rate": 7.47335754374381e-06, | |
| "loss": 0.5082, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.2671873281267187, | |
| "grad_norm": 1.7519376277923584, | |
| "learning_rate": 7.4689556509299e-06, | |
| "loss": 0.5061, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.2693873061269387, | |
| "grad_norm": 2.4111804962158203, | |
| "learning_rate": 7.464553758115991e-06, | |
| "loss": 0.4903, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.2715872841271587, | |
| "grad_norm": 1.9729013442993164, | |
| "learning_rate": 7.460151865302081e-06, | |
| "loss": 0.4881, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.2737872621273787, | |
| "grad_norm": 2.7246460914611816, | |
| "learning_rate": 7.455749972488171e-06, | |
| "loss": 0.517, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.2759872401275987, | |
| "grad_norm": 1.660434603691101, | |
| "learning_rate": 7.451348079674261e-06, | |
| "loss": 0.502, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.2781872181278187, | |
| "grad_norm": 2.782742500305176, | |
| "learning_rate": 7.446946186860351e-06, | |
| "loss": 0.4985, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.2803871961280386, | |
| "grad_norm": 2.264404296875, | |
| "learning_rate": 7.4425442940464405e-06, | |
| "loss": 0.5016, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.2825871741282588, | |
| "grad_norm": 2.0111939907073975, | |
| "learning_rate": 7.438142401232531e-06, | |
| "loss": 0.4905, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.2847871521284788, | |
| "grad_norm": 2.0050606727600098, | |
| "learning_rate": 7.433740508418621e-06, | |
| "loss": 0.4864, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.2869871301286988, | |
| "grad_norm": 1.3107115030288696, | |
| "learning_rate": 7.4293386156047105e-06, | |
| "loss": 0.4915, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.2891871081289188, | |
| "grad_norm": 1.8996055126190186, | |
| "learning_rate": 7.424936722790801e-06, | |
| "loss": 0.49, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 1.2913870861291388, | |
| "grad_norm": 1.7696682214736938, | |
| "learning_rate": 7.420534829976891e-06, | |
| "loss": 0.4968, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 1.2935870641293588, | |
| "grad_norm": 2.1315739154815674, | |
| "learning_rate": 7.4161329371629805e-06, | |
| "loss": 0.4916, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 1.2957870421295787, | |
| "grad_norm": 2.2679789066314697, | |
| "learning_rate": 7.411731044349071e-06, | |
| "loss": 0.5021, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 1.2979870201297987, | |
| "grad_norm": 2.128899097442627, | |
| "learning_rate": 7.407329151535161e-06, | |
| "loss": 0.5148, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.3001869981300187, | |
| "grad_norm": 2.085585832595825, | |
| "learning_rate": 7.4029272587212505e-06, | |
| "loss": 0.4804, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 1.3023869761302387, | |
| "grad_norm": 2.367190361022949, | |
| "learning_rate": 7.398525365907341e-06, | |
| "loss": 0.4921, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 1.3045869541304587, | |
| "grad_norm": 2.3802804946899414, | |
| "learning_rate": 7.394123473093431e-06, | |
| "loss": 0.4974, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 1.3067869321306786, | |
| "grad_norm": 2.332484483718872, | |
| "learning_rate": 7.3897215802795205e-06, | |
| "loss": 0.5115, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 1.3089869101308986, | |
| "grad_norm": 2.1906321048736572, | |
| "learning_rate": 7.385319687465611e-06, | |
| "loss": 0.505, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.3111868881311186, | |
| "grad_norm": 1.942108154296875, | |
| "learning_rate": 7.380917794651701e-06, | |
| "loss": 0.4937, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 1.3133868661313386, | |
| "grad_norm": 2.0868446826934814, | |
| "learning_rate": 7.3765159018377905e-06, | |
| "loss": 0.4963, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 1.3155868441315586, | |
| "grad_norm": 2.3469884395599365, | |
| "learning_rate": 7.372114009023881e-06, | |
| "loss": 0.5038, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 1.3177868221317786, | |
| "grad_norm": 2.1203341484069824, | |
| "learning_rate": 7.367712116209971e-06, | |
| "loss": 0.4891, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 1.3199868001319988, | |
| "grad_norm": 1.7752751111984253, | |
| "learning_rate": 7.3633102233960605e-06, | |
| "loss": 0.5036, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.3221867781322187, | |
| "grad_norm": 2.311631441116333, | |
| "learning_rate": 7.358908330582151e-06, | |
| "loss": 0.5104, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 1.3243867561324387, | |
| "grad_norm": 1.9225836992263794, | |
| "learning_rate": 7.354506437768241e-06, | |
| "loss": 0.4926, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 1.3265867341326587, | |
| "grad_norm": 1.9772847890853882, | |
| "learning_rate": 7.3501045449543305e-06, | |
| "loss": 0.4923, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 1.3287867121328787, | |
| "grad_norm": 1.6036473512649536, | |
| "learning_rate": 7.345702652140421e-06, | |
| "loss": 0.4955, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 1.3309866901330987, | |
| "grad_norm": 1.8488271236419678, | |
| "learning_rate": 7.34130075932651e-06, | |
| "loss": 0.512, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.3331866681333187, | |
| "grad_norm": 2.149338722229004, | |
| "learning_rate": 7.3368988665126005e-06, | |
| "loss": 0.4914, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 1.3353866461335386, | |
| "grad_norm": 2.4873788356781006, | |
| "learning_rate": 7.332496973698691e-06, | |
| "loss": 0.4965, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 1.3375866241337586, | |
| "grad_norm": 2.4446520805358887, | |
| "learning_rate": 7.32809508088478e-06, | |
| "loss": 0.4917, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 1.3397866021339786, | |
| "grad_norm": 2.2292611598968506, | |
| "learning_rate": 7.3236931880708705e-06, | |
| "loss": 0.4876, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 1.3419865801341986, | |
| "grad_norm": 2.0160257816314697, | |
| "learning_rate": 7.319291295256961e-06, | |
| "loss": 0.4875, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.3441865581344188, | |
| "grad_norm": 2.0969207286834717, | |
| "learning_rate": 7.31488940244305e-06, | |
| "loss": 0.5031, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 1.3463865361346388, | |
| "grad_norm": 2.283207416534424, | |
| "learning_rate": 7.310487509629141e-06, | |
| "loss": 0.4907, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 1.3485865141348587, | |
| "grad_norm": 1.9769617319107056, | |
| "learning_rate": 7.306085616815232e-06, | |
| "loss": 0.4942, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 1.3507864921350787, | |
| "grad_norm": 2.156163454055786, | |
| "learning_rate": 7.301683724001322e-06, | |
| "loss": 0.4992, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 1.3529864701352987, | |
| "grad_norm": 1.6328924894332886, | |
| "learning_rate": 7.297281831187411e-06, | |
| "loss": 0.4861, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.3551864481355187, | |
| "grad_norm": 2.365056276321411, | |
| "learning_rate": 7.292879938373502e-06, | |
| "loss": 0.4915, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 1.3573864261357387, | |
| "grad_norm": 2.6308701038360596, | |
| "learning_rate": 7.288478045559592e-06, | |
| "loss": 0.4837, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 1.3595864041359587, | |
| "grad_norm": 2.454827070236206, | |
| "learning_rate": 7.284076152745681e-06, | |
| "loss": 0.4921, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 1.3617863821361786, | |
| "grad_norm": 2.19412899017334, | |
| "learning_rate": 7.279674259931772e-06, | |
| "loss": 0.501, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 1.3639863601363986, | |
| "grad_norm": 2.183582305908203, | |
| "learning_rate": 7.275272367117862e-06, | |
| "loss": 0.4934, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.3661863381366186, | |
| "grad_norm": 2.2355942726135254, | |
| "learning_rate": 7.270870474303951e-06, | |
| "loss": 0.5037, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 1.3683863161368386, | |
| "grad_norm": 1.8665735721588135, | |
| "learning_rate": 7.266468581490042e-06, | |
| "loss": 0.5054, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 1.3705862941370586, | |
| "grad_norm": 2.457763433456421, | |
| "learning_rate": 7.262066688676131e-06, | |
| "loss": 0.4986, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 1.3727862721372786, | |
| "grad_norm": 2.2373385429382324, | |
| "learning_rate": 7.257664795862221e-06, | |
| "loss": 0.4807, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 1.3749862501374985, | |
| "grad_norm": 2.129803419113159, | |
| "learning_rate": 7.253262903048312e-06, | |
| "loss": 0.4877, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.3771862281377185, | |
| "grad_norm": 2.2858309745788574, | |
| "learning_rate": 7.248861010234401e-06, | |
| "loss": 0.492, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 1.3793862061379385, | |
| "grad_norm": 2.4332919120788574, | |
| "learning_rate": 7.244459117420491e-06, | |
| "loss": 0.4907, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 1.3815861841381587, | |
| "grad_norm": 1.7995531558990479, | |
| "learning_rate": 7.240057224606582e-06, | |
| "loss": 0.5037, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 1.3837861621383787, | |
| "grad_norm": 2.672942876815796, | |
| "learning_rate": 7.235655331792671e-06, | |
| "loss": 0.4968, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 1.3859861401385987, | |
| "grad_norm": 2.1194186210632324, | |
| "learning_rate": 7.231253438978761e-06, | |
| "loss": 0.4944, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.3881861181388186, | |
| "grad_norm": 2.5758581161499023, | |
| "learning_rate": 7.226851546164852e-06, | |
| "loss": 0.4844, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 1.3903860961390386, | |
| "grad_norm": 2.359781503677368, | |
| "learning_rate": 7.222449653350941e-06, | |
| "loss": 0.4847, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 1.3925860741392586, | |
| "grad_norm": 2.3243279457092285, | |
| "learning_rate": 7.218047760537031e-06, | |
| "loss": 0.4986, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 1.3947860521394786, | |
| "grad_norm": 2.4134695529937744, | |
| "learning_rate": 7.213645867723122e-06, | |
| "loss": 0.4961, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 1.3969860301396986, | |
| "grad_norm": 2.3432512283325195, | |
| "learning_rate": 7.209243974909211e-06, | |
| "loss": 0.5028, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.3991860081399186, | |
| "grad_norm": 2.474076747894287, | |
| "learning_rate": 7.204842082095301e-06, | |
| "loss": 0.5004, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 1.4013859861401385, | |
| "grad_norm": 2.43440580368042, | |
| "learning_rate": 7.200440189281392e-06, | |
| "loss": 0.5031, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 1.4035859641403585, | |
| "grad_norm": 2.1737067699432373, | |
| "learning_rate": 7.196038296467481e-06, | |
| "loss": 0.4871, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 1.4057859421405787, | |
| "grad_norm": 1.9419715404510498, | |
| "learning_rate": 7.191636403653571e-06, | |
| "loss": 0.4903, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 1.4079859201407987, | |
| "grad_norm": 2.1449568271636963, | |
| "learning_rate": 7.187234510839662e-06, | |
| "loss": 0.4819, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.4101858981410187, | |
| "grad_norm": 2.1790225505828857, | |
| "learning_rate": 7.182832618025751e-06, | |
| "loss": 0.5155, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 1.4123858761412387, | |
| "grad_norm": 2.4493134021759033, | |
| "learning_rate": 7.178430725211841e-06, | |
| "loss": 0.4922, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 1.4145858541414587, | |
| "grad_norm": 2.250734806060791, | |
| "learning_rate": 7.174028832397932e-06, | |
| "loss": 0.4911, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 1.4167858321416786, | |
| "grad_norm": 2.312277317047119, | |
| "learning_rate": 7.169626939584021e-06, | |
| "loss": 0.4884, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 1.4189858101418986, | |
| "grad_norm": 2.0889904499053955, | |
| "learning_rate": 7.165225046770111e-06, | |
| "loss": 0.5023, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.4211857881421186, | |
| "grad_norm": 2.2084124088287354, | |
| "learning_rate": 7.160823153956201e-06, | |
| "loss": 0.4974, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 1.4233857661423386, | |
| "grad_norm": 2.046213150024414, | |
| "learning_rate": 7.156421261142292e-06, | |
| "loss": 0.4935, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 1.4255857441425586, | |
| "grad_norm": 2.1457226276397705, | |
| "learning_rate": 7.152019368328382e-06, | |
| "loss": 0.4903, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 1.4277857221427785, | |
| "grad_norm": 2.058285713195801, | |
| "learning_rate": 7.1476174755144725e-06, | |
| "loss": 0.5002, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 1.4299857001429985, | |
| "grad_norm": 2.269285202026367, | |
| "learning_rate": 7.143215582700562e-06, | |
| "loss": 0.4891, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.4321856781432185, | |
| "grad_norm": 2.030383586883545, | |
| "learning_rate": 7.138813689886652e-06, | |
| "loss": 0.5101, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 1.4343856561434385, | |
| "grad_norm": 2.0629866123199463, | |
| "learning_rate": 7.1344117970727425e-06, | |
| "loss": 0.4931, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 1.4365856341436585, | |
| "grad_norm": 2.064944267272949, | |
| "learning_rate": 7.130009904258832e-06, | |
| "loss": 0.4992, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 1.4387856121438785, | |
| "grad_norm": 2.1032135486602783, | |
| "learning_rate": 7.125608011444922e-06, | |
| "loss": 0.4919, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 1.4409855901440984, | |
| "grad_norm": 2.3275599479675293, | |
| "learning_rate": 7.1212061186310125e-06, | |
| "loss": 0.5119, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.4431855681443184, | |
| "grad_norm": 2.2477211952209473, | |
| "learning_rate": 7.116804225817102e-06, | |
| "loss": 0.5092, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 1.4453855461445386, | |
| "grad_norm": 1.8756898641586304, | |
| "learning_rate": 7.112402333003192e-06, | |
| "loss": 0.4977, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 1.4475855241447586, | |
| "grad_norm": 2.839963436126709, | |
| "learning_rate": 7.1080004401892825e-06, | |
| "loss": 0.4939, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 1.4497855021449786, | |
| "grad_norm": 1.8775593042373657, | |
| "learning_rate": 7.103598547375372e-06, | |
| "loss": 0.4851, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 1.4519854801451986, | |
| "grad_norm": 2.1938886642456055, | |
| "learning_rate": 7.099196654561462e-06, | |
| "loss": 0.4797, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.4541854581454186, | |
| "grad_norm": 2.063523769378662, | |
| "learning_rate": 7.0947947617475525e-06, | |
| "loss": 0.4949, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 1.4563854361456385, | |
| "grad_norm": 2.156369924545288, | |
| "learning_rate": 7.090392868933642e-06, | |
| "loss": 0.4936, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 1.4585854141458585, | |
| "grad_norm": 2.4886789321899414, | |
| "learning_rate": 7.085990976119732e-06, | |
| "loss": 0.4979, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 1.4607853921460785, | |
| "grad_norm": 2.3196351528167725, | |
| "learning_rate": 7.081589083305822e-06, | |
| "loss": 0.5121, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 1.4629853701462985, | |
| "grad_norm": 2.057623863220215, | |
| "learning_rate": 7.077187190491912e-06, | |
| "loss": 0.4827, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.4651853481465185, | |
| "grad_norm": 1.9187816381454468, | |
| "learning_rate": 7.072785297678002e-06, | |
| "loss": 0.4972, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 1.4673853261467387, | |
| "grad_norm": 1.9243098497390747, | |
| "learning_rate": 7.068383404864092e-06, | |
| "loss": 0.48, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 1.4695853041469586, | |
| "grad_norm": 2.221501111984253, | |
| "learning_rate": 7.063981512050182e-06, | |
| "loss": 0.4817, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 1.4717852821471786, | |
| "grad_norm": 2.145901679992676, | |
| "learning_rate": 7.059579619236272e-06, | |
| "loss": 0.4974, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 1.4739852601473986, | |
| "grad_norm": 2.7018229961395264, | |
| "learning_rate": 7.055177726422362e-06, | |
| "loss": 0.4776, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.4761852381476186, | |
| "grad_norm": 1.826542854309082, | |
| "learning_rate": 7.050775833608452e-06, | |
| "loss": 0.494, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 1.4783852161478386, | |
| "grad_norm": 2.528482437133789, | |
| "learning_rate": 7.046373940794542e-06, | |
| "loss": 0.4804, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 1.4805851941480586, | |
| "grad_norm": 2.3805463314056396, | |
| "learning_rate": 7.041972047980632e-06, | |
| "loss": 0.5, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 1.4827851721482785, | |
| "grad_norm": 2.379004716873169, | |
| "learning_rate": 7.037570155166722e-06, | |
| "loss": 0.5008, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 1.4849851501484985, | |
| "grad_norm": 2.351308584213257, | |
| "learning_rate": 7.033168262352812e-06, | |
| "loss": 0.4917, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.4871851281487185, | |
| "grad_norm": 2.390312910079956, | |
| "learning_rate": 7.028766369538902e-06, | |
| "loss": 0.4962, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 1.4893851061489385, | |
| "grad_norm": 2.4329919815063477, | |
| "learning_rate": 7.024364476724992e-06, | |
| "loss": 0.4877, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 1.4915850841491585, | |
| "grad_norm": 2.452253580093384, | |
| "learning_rate": 7.019962583911082e-06, | |
| "loss": 0.4908, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 1.4937850621493785, | |
| "grad_norm": 2.1782665252685547, | |
| "learning_rate": 7.015560691097172e-06, | |
| "loss": 0.4804, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 1.4959850401495984, | |
| "grad_norm": 2.0464863777160645, | |
| "learning_rate": 7.011158798283262e-06, | |
| "loss": 0.4947, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.4981850181498184, | |
| "grad_norm": 1.713578701019287, | |
| "learning_rate": 7.006756905469353e-06, | |
| "loss": 0.4875, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 1.5003849961500384, | |
| "grad_norm": 2.025834560394287, | |
| "learning_rate": 7.002355012655443e-06, | |
| "loss": 0.5027, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 1.5025849741502584, | |
| "grad_norm": 2.509138822555542, | |
| "learning_rate": 6.997953119841533e-06, | |
| "loss": 0.4822, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 1.5047849521504784, | |
| "grad_norm": 2.0234317779541016, | |
| "learning_rate": 6.993551227027623e-06, | |
| "loss": 0.4975, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 1.5069849301506983, | |
| "grad_norm": 2.465769052505493, | |
| "learning_rate": 6.9891493342137125e-06, | |
| "loss": 0.5012, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.5091849081509183, | |
| "grad_norm": 2.5200085639953613, | |
| "learning_rate": 6.984747441399803e-06, | |
| "loss": 0.5017, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 1.5113848861511385, | |
| "grad_norm": 2.2190017700195312, | |
| "learning_rate": 6.980345548585893e-06, | |
| "loss": 0.4898, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 1.5135848641513585, | |
| "grad_norm": 2.2302262783050537, | |
| "learning_rate": 6.9759436557719825e-06, | |
| "loss": 0.4989, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 1.5157848421515785, | |
| "grad_norm": 2.4511725902557373, | |
| "learning_rate": 6.971541762958073e-06, | |
| "loss": 0.4934, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 1.5179848201517985, | |
| "grad_norm": 2.3731210231781006, | |
| "learning_rate": 6.967139870144163e-06, | |
| "loss": 0.4724, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.5201847981520185, | |
| "grad_norm": 2.2834906578063965, | |
| "learning_rate": 6.9627379773302525e-06, | |
| "loss": 0.4833, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 1.5223847761522384, | |
| "grad_norm": 2.483689785003662, | |
| "learning_rate": 6.958336084516343e-06, | |
| "loss": 0.4923, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 1.5245847541524584, | |
| "grad_norm": 2.316864490509033, | |
| "learning_rate": 6.953934191702433e-06, | |
| "loss": 0.5233, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 1.5267847321526786, | |
| "grad_norm": 2.1905770301818848, | |
| "learning_rate": 6.9495322988885225e-06, | |
| "loss": 0.5233, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 1.5289847101528986, | |
| "grad_norm": 2.5095105171203613, | |
| "learning_rate": 6.945130406074613e-06, | |
| "loss": 0.4927, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.5311846881531186, | |
| "grad_norm": 2.210827112197876, | |
| "learning_rate": 6.940728513260703e-06, | |
| "loss": 0.4965, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 1.5333846661533386, | |
| "grad_norm": 2.6142313480377197, | |
| "learning_rate": 6.9363266204467925e-06, | |
| "loss": 0.5025, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 1.5355846441535586, | |
| "grad_norm": 2.3923892974853516, | |
| "learning_rate": 6.931924727632883e-06, | |
| "loss": 0.4793, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 1.5377846221537785, | |
| "grad_norm": 2.1831846237182617, | |
| "learning_rate": 6.927522834818973e-06, | |
| "loss": 0.4935, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 1.5399846001539985, | |
| "grad_norm": 2.030944347381592, | |
| "learning_rate": 6.9231209420050625e-06, | |
| "loss": 0.494, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.5421845781542185, | |
| "grad_norm": 2.089087724685669, | |
| "learning_rate": 6.918719049191153e-06, | |
| "loss": 0.4989, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 1.5443845561544385, | |
| "grad_norm": 2.7058706283569336, | |
| "learning_rate": 6.914317156377243e-06, | |
| "loss": 0.4982, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 1.5465845341546585, | |
| "grad_norm": 2.312584638595581, | |
| "learning_rate": 6.9099152635633325e-06, | |
| "loss": 0.4981, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 1.5487845121548784, | |
| "grad_norm": 2.5172085762023926, | |
| "learning_rate": 6.905513370749423e-06, | |
| "loss": 0.4871, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 1.5509844901550984, | |
| "grad_norm": 2.035313367843628, | |
| "learning_rate": 6.901111477935512e-06, | |
| "loss": 0.4859, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.5531844681553184, | |
| "grad_norm": 2.3374691009521484, | |
| "learning_rate": 6.8967095851216025e-06, | |
| "loss": 0.4831, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 1.5553844461555384, | |
| "grad_norm": 2.2027342319488525, | |
| "learning_rate": 6.892307692307693e-06, | |
| "loss": 0.4974, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 1.5575844241557584, | |
| "grad_norm": 2.4372105598449707, | |
| "learning_rate": 6.887905799493782e-06, | |
| "loss": 0.4902, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 1.5597844021559784, | |
| "grad_norm": 2.320554256439209, | |
| "learning_rate": 6.8835039066798725e-06, | |
| "loss": 0.4917, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 1.5619843801561983, | |
| "grad_norm": 2.323988437652588, | |
| "learning_rate": 6.879102013865963e-06, | |
| "loss": 0.5034, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.5641843581564183, | |
| "grad_norm": 2.111454725265503, | |
| "learning_rate": 6.874700121052052e-06, | |
| "loss": 0.492, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 1.5663843361566383, | |
| "grad_norm": 2.664884328842163, | |
| "learning_rate": 6.8702982282381425e-06, | |
| "loss": 0.4982, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 1.5685843141568583, | |
| "grad_norm": 1.9500539302825928, | |
| "learning_rate": 6.865896335424233e-06, | |
| "loss": 0.5147, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 1.5707842921570783, | |
| "grad_norm": 2.3592636585235596, | |
| "learning_rate": 6.861494442610322e-06, | |
| "loss": 0.4825, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 1.5729842701572985, | |
| "grad_norm": 2.4548308849334717, | |
| "learning_rate": 6.8570925497964125e-06, | |
| "loss": 0.4949, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.5751842481575185, | |
| "grad_norm": 2.971724033355713, | |
| "learning_rate": 6.852690656982504e-06, | |
| "loss": 0.4945, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 1.5773842261577384, | |
| "grad_norm": 2.399245023727417, | |
| "learning_rate": 6.848288764168594e-06, | |
| "loss": 0.4888, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 1.5795842041579584, | |
| "grad_norm": 2.2702841758728027, | |
| "learning_rate": 6.843886871354683e-06, | |
| "loss": 0.49, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 1.5817841821581784, | |
| "grad_norm": 1.9252210855484009, | |
| "learning_rate": 6.839484978540774e-06, | |
| "loss": 0.494, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 1.5839841601583984, | |
| "grad_norm": 2.4878454208374023, | |
| "learning_rate": 6.835083085726864e-06, | |
| "loss": 0.4984, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.5861841381586184, | |
| "grad_norm": 2.035708427429199, | |
| "learning_rate": 6.830681192912953e-06, | |
| "loss": 0.4825, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 1.5883841161588386, | |
| "grad_norm": 2.55355167388916, | |
| "learning_rate": 6.826279300099044e-06, | |
| "loss": 0.5056, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 1.5905840941590585, | |
| "grad_norm": 2.4391555786132812, | |
| "learning_rate": 6.821877407285133e-06, | |
| "loss": 0.4928, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 1.5927840721592785, | |
| "grad_norm": 2.2338058948516846, | |
| "learning_rate": 6.817475514471223e-06, | |
| "loss": 0.4874, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 1.5949840501594985, | |
| "grad_norm": 2.7937569618225098, | |
| "learning_rate": 6.813073621657314e-06, | |
| "loss": 0.477, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.5971840281597185, | |
| "grad_norm": 2.2559831142425537, | |
| "learning_rate": 6.808671728843403e-06, | |
| "loss": 0.501, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 1.5993840061599385, | |
| "grad_norm": 2.1428000926971436, | |
| "learning_rate": 6.804269836029493e-06, | |
| "loss": 0.4872, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 1.6015839841601585, | |
| "grad_norm": 2.306943655014038, | |
| "learning_rate": 6.799867943215584e-06, | |
| "loss": 0.5002, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 1.6037839621603784, | |
| "grad_norm": 2.3396975994110107, | |
| "learning_rate": 6.795466050401673e-06, | |
| "loss": 0.4951, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 1.6059839401605984, | |
| "grad_norm": 1.8894736766815186, | |
| "learning_rate": 6.791064157587763e-06, | |
| "loss": 0.4872, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.6081839181608184, | |
| "grad_norm": 2.0049326419830322, | |
| "learning_rate": 6.786662264773854e-06, | |
| "loss": 0.4877, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 1.6103838961610384, | |
| "grad_norm": 2.3615005016326904, | |
| "learning_rate": 6.782260371959943e-06, | |
| "loss": 0.4925, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 1.6125838741612584, | |
| "grad_norm": 2.386545419692993, | |
| "learning_rate": 6.777858479146033e-06, | |
| "loss": 0.4881, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 1.6147838521614784, | |
| "grad_norm": 2.3752076625823975, | |
| "learning_rate": 6.773456586332124e-06, | |
| "loss": 0.4813, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 1.6169838301616983, | |
| "grad_norm": 2.156837224960327, | |
| "learning_rate": 6.769054693518213e-06, | |
| "loss": 0.4793, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.6191838081619183, | |
| "grad_norm": 2.788848638534546, | |
| "learning_rate": 6.764652800704303e-06, | |
| "loss": 0.4946, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 1.6213837861621383, | |
| "grad_norm": 2.1992275714874268, | |
| "learning_rate": 6.760250907890394e-06, | |
| "loss": 0.5019, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 1.6235837641623583, | |
| "grad_norm": 2.664424419403076, | |
| "learning_rate": 6.755849015076483e-06, | |
| "loss": 0.4885, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 1.6257837421625783, | |
| "grad_norm": 2.3380892276763916, | |
| "learning_rate": 6.751447122262573e-06, | |
| "loss": 0.4947, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 1.6279837201627982, | |
| "grad_norm": 2.3588438034057617, | |
| "learning_rate": 6.747045229448664e-06, | |
| "loss": 0.4652, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.6301836981630182, | |
| "grad_norm": 2.6669723987579346, | |
| "learning_rate": 6.742643336634753e-06, | |
| "loss": 0.491, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 1.6323836761632382, | |
| "grad_norm": 2.4595651626586914, | |
| "learning_rate": 6.738241443820843e-06, | |
| "loss": 0.501, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 1.6345836541634584, | |
| "grad_norm": 2.2686636447906494, | |
| "learning_rate": 6.733839551006934e-06, | |
| "loss": 0.482, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 1.6367836321636784, | |
| "grad_norm": 2.4227776527404785, | |
| "learning_rate": 6.729437658193023e-06, | |
| "loss": 0.4958, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 1.6389836101638984, | |
| "grad_norm": 1.9847477674484253, | |
| "learning_rate": 6.725035765379113e-06, | |
| "loss": 0.4834, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.6411835881641184, | |
| "grad_norm": 2.6502370834350586, | |
| "learning_rate": 6.720633872565203e-06, | |
| "loss": 0.4815, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 1.6433835661643383, | |
| "grad_norm": 2.2831785678863525, | |
| "learning_rate": 6.716231979751293e-06, | |
| "loss": 0.4826, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 1.6455835441645583, | |
| "grad_norm": 1.8865406513214111, | |
| "learning_rate": 6.711830086937383e-06, | |
| "loss": 0.4986, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 1.6477835221647783, | |
| "grad_norm": 2.026791572570801, | |
| "learning_rate": 6.707428194123473e-06, | |
| "loss": 0.4872, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 1.6499835001649985, | |
| "grad_norm": 2.772639036178589, | |
| "learning_rate": 6.703026301309563e-06, | |
| "loss": 0.4891, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.6521834781652185, | |
| "grad_norm": 2.4932167530059814, | |
| "learning_rate": 6.698624408495654e-06, | |
| "loss": 0.4868, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 1.6543834561654385, | |
| "grad_norm": 2.5153396129608154, | |
| "learning_rate": 6.6942225156817445e-06, | |
| "loss": 0.5073, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 1.6565834341656585, | |
| "grad_norm": 1.7845731973648071, | |
| "learning_rate": 6.689820622867834e-06, | |
| "loss": 0.496, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 1.6587834121658784, | |
| "grad_norm": 2.392333745956421, | |
| "learning_rate": 6.685418730053924e-06, | |
| "loss": 0.5044, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 1.6609833901660984, | |
| "grad_norm": 2.624262809753418, | |
| "learning_rate": 6.6810168372400145e-06, | |
| "loss": 0.5196, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.6631833681663184, | |
| "grad_norm": 2.421013355255127, | |
| "learning_rate": 6.676614944426104e-06, | |
| "loss": 0.4938, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 1.6653833461665384, | |
| "grad_norm": 2.1836936473846436, | |
| "learning_rate": 6.672213051612194e-06, | |
| "loss": 0.4735, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 1.6675833241667584, | |
| "grad_norm": 2.523780345916748, | |
| "learning_rate": 6.6678111587982845e-06, | |
| "loss": 0.4868, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 1.6697833021669783, | |
| "grad_norm": 3.20668363571167, | |
| "learning_rate": 6.663409265984374e-06, | |
| "loss": 0.4902, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 1.6719832801671983, | |
| "grad_norm": 2.6450743675231934, | |
| "learning_rate": 6.659007373170464e-06, | |
| "loss": 0.4852, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.6741832581674183, | |
| "grad_norm": 2.3257484436035156, | |
| "learning_rate": 6.6546054803565545e-06, | |
| "loss": 0.4913, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 1.6763832361676383, | |
| "grad_norm": 1.7676602602005005, | |
| "learning_rate": 6.650203587542644e-06, | |
| "loss": 0.5051, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 1.6785832141678583, | |
| "grad_norm": 2.2192280292510986, | |
| "learning_rate": 6.645801694728734e-06, | |
| "loss": 0.4959, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 1.6807831921680783, | |
| "grad_norm": 2.4453659057617188, | |
| "learning_rate": 6.641399801914824e-06, | |
| "loss": 0.4841, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 1.6829831701682982, | |
| "grad_norm": 1.9458132982254028, | |
| "learning_rate": 6.636997909100914e-06, | |
| "loss": 0.4911, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.6851831481685182, | |
| "grad_norm": 2.2809267044067383, | |
| "learning_rate": 6.632596016287004e-06, | |
| "loss": 0.4871, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 1.6873831261687382, | |
| "grad_norm": 2.630840301513672, | |
| "learning_rate": 6.628194123473094e-06, | |
| "loss": 0.4813, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 1.6895831041689582, | |
| "grad_norm": 2.8288991451263428, | |
| "learning_rate": 6.623792230659184e-06, | |
| "loss": 0.4918, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 1.6917830821691782, | |
| "grad_norm": 2.220552921295166, | |
| "learning_rate": 6.619390337845274e-06, | |
| "loss": 0.4958, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 1.6939830601693981, | |
| "grad_norm": 2.3790931701660156, | |
| "learning_rate": 6.614988445031364e-06, | |
| "loss": 0.5098, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.6961830381696184, | |
| "grad_norm": 2.605365753173828, | |
| "learning_rate": 6.610586552217454e-06, | |
| "loss": 0.4999, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 1.6983830161698383, | |
| "grad_norm": 2.526428461074829, | |
| "learning_rate": 6.606184659403544e-06, | |
| "loss": 0.5008, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 1.7005829941700583, | |
| "grad_norm": 2.2195465564727783, | |
| "learning_rate": 6.601782766589634e-06, | |
| "loss": 0.4846, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 1.7027829721702783, | |
| "grad_norm": 2.925656318664551, | |
| "learning_rate": 6.597380873775724e-06, | |
| "loss": 0.4773, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 1.7049829501704983, | |
| "grad_norm": 2.5258848667144775, | |
| "learning_rate": 6.592978980961814e-06, | |
| "loss": 0.4972, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.7071829281707183, | |
| "grad_norm": 3.0461318492889404, | |
| "learning_rate": 6.588577088147904e-06, | |
| "loss": 0.4857, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 1.7093829061709382, | |
| "grad_norm": 2.3932976722717285, | |
| "learning_rate": 6.584175195333994e-06, | |
| "loss": 0.4999, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 1.7115828841711584, | |
| "grad_norm": 2.044865369796753, | |
| "learning_rate": 6.579773302520084e-06, | |
| "loss": 0.4898, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 1.7137828621713784, | |
| "grad_norm": 2.366441011428833, | |
| "learning_rate": 6.575371409706174e-06, | |
| "loss": 0.4786, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 1.7159828401715984, | |
| "grad_norm": 2.57084584236145, | |
| "learning_rate": 6.570969516892264e-06, | |
| "loss": 0.4766, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.7181828181718184, | |
| "grad_norm": 2.560520887374878, | |
| "learning_rate": 6.566567624078354e-06, | |
| "loss": 0.4891, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 1.7203827961720384, | |
| "grad_norm": 2.1307547092437744, | |
| "learning_rate": 6.562165731264444e-06, | |
| "loss": 0.4852, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 1.7225827741722584, | |
| "grad_norm": 2.4924020767211914, | |
| "learning_rate": 6.557763838450534e-06, | |
| "loss": 0.4836, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 1.7247827521724783, | |
| "grad_norm": 2.323122978210449, | |
| "learning_rate": 6.553361945636624e-06, | |
| "loss": 0.4926, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 1.7269827301726983, | |
| "grad_norm": 2.1391868591308594, | |
| "learning_rate": 6.5489600528227145e-06, | |
| "loss": 0.4974, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.7291827081729183, | |
| "grad_norm": 2.2388463020324707, | |
| "learning_rate": 6.544558160008805e-06, | |
| "loss": 0.4825, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 1.7313826861731383, | |
| "grad_norm": 2.617159843444824, | |
| "learning_rate": 6.540156267194895e-06, | |
| "loss": 0.4969, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 1.7335826641733583, | |
| "grad_norm": 1.9445505142211914, | |
| "learning_rate": 6.5357543743809845e-06, | |
| "loss": 0.494, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 1.7357826421735782, | |
| "grad_norm": 1.8033205270767212, | |
| "learning_rate": 6.531352481567075e-06, | |
| "loss": 0.4901, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 1.7379826201737982, | |
| "grad_norm": 2.480191469192505, | |
| "learning_rate": 6.526950588753165e-06, | |
| "loss": 0.4756, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.7401825981740182, | |
| "grad_norm": 2.203779697418213, | |
| "learning_rate": 6.5225486959392545e-06, | |
| "loss": 0.4949, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 1.7423825761742382, | |
| "grad_norm": 2.6420180797576904, | |
| "learning_rate": 6.518146803125345e-06, | |
| "loss": 0.476, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 1.7445825541744582, | |
| "grad_norm": 2.4949381351470947, | |
| "learning_rate": 6.513744910311435e-06, | |
| "loss": 0.4805, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 1.7467825321746782, | |
| "grad_norm": 1.6507716178894043, | |
| "learning_rate": 6.5093430174975245e-06, | |
| "loss": 0.4928, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 1.7489825101748981, | |
| "grad_norm": 2.849067211151123, | |
| "learning_rate": 6.504941124683615e-06, | |
| "loss": 0.4879, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.7511824881751181, | |
| "grad_norm": 2.404705047607422, | |
| "learning_rate": 6.500539231869705e-06, | |
| "loss": 0.4761, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 1.753382466175338, | |
| "grad_norm": 2.653310537338257, | |
| "learning_rate": 6.4961373390557945e-06, | |
| "loss": 0.5017, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 1.755582444175558, | |
| "grad_norm": 2.31355619430542, | |
| "learning_rate": 6.491735446241885e-06, | |
| "loss": 0.4802, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 1.7577824221757783, | |
| "grad_norm": 2.361945867538452, | |
| "learning_rate": 6.487333553427975e-06, | |
| "loss": 0.4816, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 1.7599824001759983, | |
| "grad_norm": 2.199768304824829, | |
| "learning_rate": 6.4829316606140645e-06, | |
| "loss": 0.4632, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.7621823781762183, | |
| "grad_norm": 1.8634425401687622, | |
| "learning_rate": 6.478529767800155e-06, | |
| "loss": 0.4909, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 1.7643823561764382, | |
| "grad_norm": 2.742694616317749, | |
| "learning_rate": 6.474127874986245e-06, | |
| "loss": 0.4939, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 1.7665823341766582, | |
| "grad_norm": 2.8734514713287354, | |
| "learning_rate": 6.4697259821723345e-06, | |
| "loss": 0.4917, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 1.7687823121768782, | |
| "grad_norm": 2.59197735786438, | |
| "learning_rate": 6.465324089358425e-06, | |
| "loss": 0.4781, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 1.7709822901770982, | |
| "grad_norm": 2.3575127124786377, | |
| "learning_rate": 6.460922196544514e-06, | |
| "loss": 0.4801, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.7731822681773184, | |
| "grad_norm": 2.599222421646118, | |
| "learning_rate": 6.4565203037306045e-06, | |
| "loss": 0.4891, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 1.7753822461775384, | |
| "grad_norm": 2.7138659954071045, | |
| "learning_rate": 6.452118410916695e-06, | |
| "loss": 0.491, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 1.7775822241777584, | |
| "grad_norm": 2.467128038406372, | |
| "learning_rate": 6.447716518102784e-06, | |
| "loss": 0.4984, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 1.7797822021779783, | |
| "grad_norm": 2.4047677516937256, | |
| "learning_rate": 6.4433146252888745e-06, | |
| "loss": 0.4756, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 1.7819821801781983, | |
| "grad_norm": 2.0229098796844482, | |
| "learning_rate": 6.438912732474965e-06, | |
| "loss": 0.4792, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.7841821581784183, | |
| "grad_norm": 2.463090658187866, | |
| "learning_rate": 6.434510839661054e-06, | |
| "loss": 0.4824, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 1.7863821361786383, | |
| "grad_norm": 2.3522398471832275, | |
| "learning_rate": 6.4301089468471445e-06, | |
| "loss": 0.4938, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 1.7885821141788583, | |
| "grad_norm": 2.1566226482391357, | |
| "learning_rate": 6.425707054033235e-06, | |
| "loss": 0.4858, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 1.7907820921790782, | |
| "grad_norm": 2.452099084854126, | |
| "learning_rate": 6.421305161219324e-06, | |
| "loss": 0.4879, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 1.7929820701792982, | |
| "grad_norm": 2.3728647232055664, | |
| "learning_rate": 6.4169032684054144e-06, | |
| "loss": 0.499, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.7951820481795182, | |
| "grad_norm": 2.499342441558838, | |
| "learning_rate": 6.412501375591505e-06, | |
| "loss": 0.4799, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 1.7973820261797382, | |
| "grad_norm": 2.281799077987671, | |
| "learning_rate": 6.408099482777594e-06, | |
| "loss": 0.4823, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 1.7995820041799582, | |
| "grad_norm": 2.5670275688171387, | |
| "learning_rate": 6.4036975899636844e-06, | |
| "loss": 0.4956, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 1.8017819821801782, | |
| "grad_norm": 2.830780506134033, | |
| "learning_rate": 6.399295697149775e-06, | |
| "loss": 0.4909, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 1.8039819601803981, | |
| "grad_norm": 2.3581204414367676, | |
| "learning_rate": 6.394893804335866e-06, | |
| "loss": 0.4906, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.8061819381806181, | |
| "grad_norm": 2.6061856746673584, | |
| "learning_rate": 6.390491911521955e-06, | |
| "loss": 0.488, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 1.808381916180838, | |
| "grad_norm": 2.3762636184692383, | |
| "learning_rate": 6.386090018708046e-06, | |
| "loss": 0.4957, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 1.810581894181058, | |
| "grad_norm": 2.7238190174102783, | |
| "learning_rate": 6.381688125894136e-06, | |
| "loss": 0.4866, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 1.812781872181278, | |
| "grad_norm": 2.1085996627807617, | |
| "learning_rate": 6.377286233080225e-06, | |
| "loss": 0.4666, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 1.814981850181498, | |
| "grad_norm": 2.127675771713257, | |
| "learning_rate": 6.372884340266316e-06, | |
| "loss": 0.4975, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.817181828181718, | |
| "grad_norm": 2.0977835655212402, | |
| "learning_rate": 6.368482447452405e-06, | |
| "loss": 0.5016, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 1.8193818061819382, | |
| "grad_norm": 2.5928144454956055, | |
| "learning_rate": 6.364080554638495e-06, | |
| "loss": 0.4904, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 1.8215817841821582, | |
| "grad_norm": 2.5363171100616455, | |
| "learning_rate": 6.359678661824586e-06, | |
| "loss": 0.4739, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 1.8237817621823782, | |
| "grad_norm": 1.779845952987671, | |
| "learning_rate": 6.355276769010675e-06, | |
| "loss": 0.475, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 1.8259817401825982, | |
| "grad_norm": 2.3891873359680176, | |
| "learning_rate": 6.350874876196765e-06, | |
| "loss": 0.4867, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.8281817181828182, | |
| "grad_norm": 2.5663325786590576, | |
| "learning_rate": 6.3464729833828556e-06, | |
| "loss": 0.4706, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 1.8303816961830381, | |
| "grad_norm": 2.2070469856262207, | |
| "learning_rate": 6.342071090568945e-06, | |
| "loss": 0.4894, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 1.8325816741832581, | |
| "grad_norm": 2.3300230503082275, | |
| "learning_rate": 6.337669197755035e-06, | |
| "loss": 0.4843, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 1.8347816521834783, | |
| "grad_norm": 2.1778311729431152, | |
| "learning_rate": 6.3332673049411256e-06, | |
| "loss": 0.5032, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 1.8369816301836983, | |
| "grad_norm": 2.106933832168579, | |
| "learning_rate": 6.328865412127215e-06, | |
| "loss": 0.4875, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.8391816081839183, | |
| "grad_norm": 2.6579482555389404, | |
| "learning_rate": 6.324463519313305e-06, | |
| "loss": 0.4892, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 1.8413815861841383, | |
| "grad_norm": 2.3309366703033447, | |
| "learning_rate": 6.3200616264993956e-06, | |
| "loss": 0.4699, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 1.8435815641843583, | |
| "grad_norm": 2.503455400466919, | |
| "learning_rate": 6.315659733685485e-06, | |
| "loss": 0.4801, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 1.8457815421845782, | |
| "grad_norm": 2.5221006870269775, | |
| "learning_rate": 6.311257840871575e-06, | |
| "loss": 0.4834, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 1.8479815201847982, | |
| "grad_norm": 2.271540403366089, | |
| "learning_rate": 6.3068559480576656e-06, | |
| "loss": 0.4759, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.8501814981850182, | |
| "grad_norm": 2.2240519523620605, | |
| "learning_rate": 6.302454055243755e-06, | |
| "loss": 0.4858, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 1.8523814761852382, | |
| "grad_norm": 2.41463041305542, | |
| "learning_rate": 6.298052162429845e-06, | |
| "loss": 0.4951, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 1.8545814541854582, | |
| "grad_norm": 2.420825242996216, | |
| "learning_rate": 6.2936502696159356e-06, | |
| "loss": 0.4949, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 1.8567814321856781, | |
| "grad_norm": 2.6283483505249023, | |
| "learning_rate": 6.289248376802025e-06, | |
| "loss": 0.4928, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 1.8589814101858981, | |
| "grad_norm": 2.6053175926208496, | |
| "learning_rate": 6.284846483988115e-06, | |
| "loss": 0.4951, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.8611813881861181, | |
| "grad_norm": 2.556842803955078, | |
| "learning_rate": 6.280444591174205e-06, | |
| "loss": 0.4766, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 1.863381366186338, | |
| "grad_norm": 2.583364248275757, | |
| "learning_rate": 6.276042698360295e-06, | |
| "loss": 0.4964, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 1.865581344186558, | |
| "grad_norm": 2.407144069671631, | |
| "learning_rate": 6.271640805546385e-06, | |
| "loss": 0.4882, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 1.867781322186778, | |
| "grad_norm": 2.20274019241333, | |
| "learning_rate": 6.267238912732475e-06, | |
| "loss": 0.488, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 1.869981300186998, | |
| "grad_norm": 2.537299871444702, | |
| "learning_rate": 6.262837019918565e-06, | |
| "loss": 0.4912, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.872181278187218, | |
| "grad_norm": 2.4242103099823, | |
| "learning_rate": 6.258435127104655e-06, | |
| "loss": 0.4857, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 1.874381256187438, | |
| "grad_norm": 1.9029467105865479, | |
| "learning_rate": 6.254033234290745e-06, | |
| "loss": 0.4969, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 1.876581234187658, | |
| "grad_norm": 3.0369937419891357, | |
| "learning_rate": 6.249631341476835e-06, | |
| "loss": 0.4854, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 1.878781212187878, | |
| "grad_norm": 2.6991753578186035, | |
| "learning_rate": 6.245229448662925e-06, | |
| "loss": 0.4771, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 1.8809811901880982, | |
| "grad_norm": 2.336350679397583, | |
| "learning_rate": 6.240827555849016e-06, | |
| "loss": 0.4922, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.8831811681883182, | |
| "grad_norm": 2.731637477874756, | |
| "learning_rate": 6.236425663035106e-06, | |
| "loss": 0.4877, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 1.8853811461885381, | |
| "grad_norm": 2.438896417617798, | |
| "learning_rate": 6.232023770221196e-06, | |
| "loss": 0.4743, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 1.8875811241887581, | |
| "grad_norm": 2.8118035793304443, | |
| "learning_rate": 6.227621877407286e-06, | |
| "loss": 0.4804, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 1.889781102188978, | |
| "grad_norm": 2.5621535778045654, | |
| "learning_rate": 6.223219984593376e-06, | |
| "loss": 0.4849, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 1.891981080189198, | |
| "grad_norm": 2.3240880966186523, | |
| "learning_rate": 6.218818091779466e-06, | |
| "loss": 0.4919, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.894181058189418, | |
| "grad_norm": 2.481004238128662, | |
| "learning_rate": 6.214416198965556e-06, | |
| "loss": 0.4794, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 1.8963810361896383, | |
| "grad_norm": 2.4835259914398193, | |
| "learning_rate": 6.210014306151646e-06, | |
| "loss": 0.479, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 1.8985810141898583, | |
| "grad_norm": 2.3219950199127197, | |
| "learning_rate": 6.205612413337736e-06, | |
| "loss": 0.4743, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 1.9007809921900782, | |
| "grad_norm": 2.9407191276550293, | |
| "learning_rate": 6.201210520523826e-06, | |
| "loss": 0.4641, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 1.9029809701902982, | |
| "grad_norm": 2.64907169342041, | |
| "learning_rate": 6.196808627709916e-06, | |
| "loss": 0.4821, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.9051809481905182, | |
| "grad_norm": 2.1783690452575684, | |
| "learning_rate": 6.192406734896006e-06, | |
| "loss": 0.4709, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 1.9073809261907382, | |
| "grad_norm": 2.755631685256958, | |
| "learning_rate": 6.1880048420820956e-06, | |
| "loss": 0.4816, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 1.9095809041909582, | |
| "grad_norm": 2.761409044265747, | |
| "learning_rate": 6.183602949268186e-06, | |
| "loss": 0.4833, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 1.9117808821911781, | |
| "grad_norm": 2.676274061203003, | |
| "learning_rate": 6.179201056454276e-06, | |
| "loss": 0.4962, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 1.9139808601913981, | |
| "grad_norm": 2.450660467147827, | |
| "learning_rate": 6.1747991636403656e-06, | |
| "loss": 0.473, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.916180838191618, | |
| "grad_norm": 2.693134069442749, | |
| "learning_rate": 6.170397270826456e-06, | |
| "loss": 0.4781, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 1.918380816191838, | |
| "grad_norm": 2.411348581314087, | |
| "learning_rate": 6.165995378012546e-06, | |
| "loss": 0.4804, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 1.920580794192058, | |
| "grad_norm": 2.500234842300415, | |
| "learning_rate": 6.1615934851986356e-06, | |
| "loss": 0.4837, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 1.922780772192278, | |
| "grad_norm": 3.033048391342163, | |
| "learning_rate": 6.157191592384726e-06, | |
| "loss": 0.471, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 1.924980750192498, | |
| "grad_norm": 1.847033143043518, | |
| "learning_rate": 6.152789699570816e-06, | |
| "loss": 0.4823, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.927180728192718, | |
| "grad_norm": 2.5302257537841797, | |
| "learning_rate": 6.1483878067569056e-06, | |
| "loss": 0.4826, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 1.929380706192938, | |
| "grad_norm": 1.998494029045105, | |
| "learning_rate": 6.143985913942996e-06, | |
| "loss": 0.4891, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 1.931580684193158, | |
| "grad_norm": 2.995784044265747, | |
| "learning_rate": 6.139584021129086e-06, | |
| "loss": 0.4847, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 1.933780662193378, | |
| "grad_norm": 2.2645761966705322, | |
| "learning_rate": 6.1351821283151756e-06, | |
| "loss": 0.5042, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 1.935980640193598, | |
| "grad_norm": 2.3474481105804443, | |
| "learning_rate": 6.130780235501266e-06, | |
| "loss": 0.4845, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.938180618193818, | |
| "grad_norm": 2.570206880569458, | |
| "learning_rate": 6.126378342687356e-06, | |
| "loss": 0.4794, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 1.940380596194038, | |
| "grad_norm": 1.8715978860855103, | |
| "learning_rate": 6.1219764498734456e-06, | |
| "loss": 0.4775, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 1.942580574194258, | |
| "grad_norm": 2.443993330001831, | |
| "learning_rate": 6.117574557059536e-06, | |
| "loss": 0.4824, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 1.944780552194478, | |
| "grad_norm": 2.4730186462402344, | |
| "learning_rate": 6.113172664245626e-06, | |
| "loss": 0.4914, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 1.946980530194698, | |
| "grad_norm": 2.6471264362335205, | |
| "learning_rate": 6.1087707714317156e-06, | |
| "loss": 0.4826, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.949180508194918, | |
| "grad_norm": 2.5795907974243164, | |
| "learning_rate": 6.104368878617806e-06, | |
| "loss": 0.4871, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 1.951380486195138, | |
| "grad_norm": 2.3072896003723145, | |
| "learning_rate": 6.099966985803895e-06, | |
| "loss": 0.4937, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 1.953580464195358, | |
| "grad_norm": 2.5398294925689697, | |
| "learning_rate": 6.0955650929899856e-06, | |
| "loss": 0.4919, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 1.955780442195578, | |
| "grad_norm": 2.15952730178833, | |
| "learning_rate": 6.091163200176077e-06, | |
| "loss": 0.4934, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 1.957980420195798, | |
| "grad_norm": 2.4487977027893066, | |
| "learning_rate": 6.086761307362167e-06, | |
| "loss": 0.4842, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.9601803981960182, | |
| "grad_norm": 2.4906442165374756, | |
| "learning_rate": 6.082359414548256e-06, | |
| "loss": 0.484, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 1.9623803761962382, | |
| "grad_norm": 2.605121374130249, | |
| "learning_rate": 6.077957521734347e-06, | |
| "loss": 0.4903, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 1.9645803541964582, | |
| "grad_norm": 2.7144834995269775, | |
| "learning_rate": 6.073555628920437e-06, | |
| "loss": 0.4931, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 1.9667803321966781, | |
| "grad_norm": 2.7881131172180176, | |
| "learning_rate": 6.069153736106526e-06, | |
| "loss": 0.495, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 1.9689803101968981, | |
| "grad_norm": 3.044265031814575, | |
| "learning_rate": 6.064751843292617e-06, | |
| "loss": 0.4757, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.971180288197118, | |
| "grad_norm": 2.3652849197387695, | |
| "learning_rate": 6.060349950478707e-06, | |
| "loss": 0.4761, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 1.973380266197338, | |
| "grad_norm": 1.9909372329711914, | |
| "learning_rate": 6.055948057664796e-06, | |
| "loss": 0.492, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 1.975580244197558, | |
| "grad_norm": 2.1215572357177734, | |
| "learning_rate": 6.051546164850887e-06, | |
| "loss": 0.4787, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 1.977780222197778, | |
| "grad_norm": 2.807328701019287, | |
| "learning_rate": 6.047144272036977e-06, | |
| "loss": 0.4845, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 1.979980200197998, | |
| "grad_norm": 2.344365358352661, | |
| "learning_rate": 6.042742379223066e-06, | |
| "loss": 0.4892, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.982180178198218, | |
| "grad_norm": 2.1772940158843994, | |
| "learning_rate": 6.038340486409157e-06, | |
| "loss": 0.4849, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 1.984380156198438, | |
| "grad_norm": 2.4292235374450684, | |
| "learning_rate": 6.033938593595247e-06, | |
| "loss": 0.4869, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 1.986580134198658, | |
| "grad_norm": 2.350494861602783, | |
| "learning_rate": 6.029536700781336e-06, | |
| "loss": 0.4945, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 1.988780112198878, | |
| "grad_norm": 2.447011709213257, | |
| "learning_rate": 6.025134807967427e-06, | |
| "loss": 0.4632, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 1.990980090199098, | |
| "grad_norm": 2.229335069656372, | |
| "learning_rate": 6.020732915153516e-06, | |
| "loss": 0.491, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 1.993180068199318, | |
| "grad_norm": 2.659064292907715, | |
| "learning_rate": 6.016331022339606e-06, | |
| "loss": 0.4788, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 1.995380046199538, | |
| "grad_norm": 2.435239791870117, | |
| "learning_rate": 6.011929129525697e-06, | |
| "loss": 0.4947, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 1.9975800241997579, | |
| "grad_norm": 2.0373647212982178, | |
| "learning_rate": 6.007527236711786e-06, | |
| "loss": 0.4832, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 1.9997800021999779, | |
| "grad_norm": 2.644747734069824, | |
| "learning_rate": 6.003125343897876e-06, | |
| "loss": 0.4884, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 2.001979980200198, | |
| "grad_norm": 2.4957003593444824, | |
| "learning_rate": 5.998723451083967e-06, | |
| "loss": 0.4441, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 2.004179958200418, | |
| "grad_norm": 2.8672921657562256, | |
| "learning_rate": 5.994321558270056e-06, | |
| "loss": 0.4586, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 2.006379936200638, | |
| "grad_norm": 2.2238707542419434, | |
| "learning_rate": 5.989919665456146e-06, | |
| "loss": 0.4508, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 2.008579914200858, | |
| "grad_norm": 3.085266590118408, | |
| "learning_rate": 5.985517772642237e-06, | |
| "loss": 0.4454, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 2.010779892201078, | |
| "grad_norm": 2.7190568447113037, | |
| "learning_rate": 5.981115879828326e-06, | |
| "loss": 0.4421, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 2.012979870201298, | |
| "grad_norm": 2.966407537460327, | |
| "learning_rate": 5.976713987014416e-06, | |
| "loss": 0.4334, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 2.015179848201518, | |
| "grad_norm": 2.963914394378662, | |
| "learning_rate": 5.972312094200507e-06, | |
| "loss": 0.4428, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 2.017379826201738, | |
| "grad_norm": 3.2475080490112305, | |
| "learning_rate": 5.967910201386596e-06, | |
| "loss": 0.4387, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 2.019579804201958, | |
| "grad_norm": 2.248386859893799, | |
| "learning_rate": 5.963508308572686e-06, | |
| "loss": 0.4509, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 2.021779782202178, | |
| "grad_norm": 2.9276363849639893, | |
| "learning_rate": 5.959106415758777e-06, | |
| "loss": 0.4509, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 2.023979760202398, | |
| "grad_norm": 3.2354319095611572, | |
| "learning_rate": 5.954704522944866e-06, | |
| "loss": 0.4396, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 2.026179738202618, | |
| "grad_norm": 3.478252649307251, | |
| "learning_rate": 5.950302630130956e-06, | |
| "loss": 0.454, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 2.028379716202838, | |
| "grad_norm": 2.1570658683776855, | |
| "learning_rate": 5.945900737317047e-06, | |
| "loss": 0.4426, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 2.030579694203058, | |
| "grad_norm": 3.555510997772217, | |
| "learning_rate": 5.941498844503136e-06, | |
| "loss": 0.4278, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 2.032779672203278, | |
| "grad_norm": 3.0837221145629883, | |
| "learning_rate": 5.937096951689227e-06, | |
| "loss": 0.4582, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 2.034979650203498, | |
| "grad_norm": 3.023439407348633, | |
| "learning_rate": 5.9326950588753175e-06, | |
| "loss": 0.445, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 2.037179628203718, | |
| "grad_norm": 2.8164618015289307, | |
| "learning_rate": 5.928293166061407e-06, | |
| "loss": 0.4474, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 2.039379606203938, | |
| "grad_norm": 2.4497897624969482, | |
| "learning_rate": 5.923891273247497e-06, | |
| "loss": 0.4581, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 2.041579584204158, | |
| "grad_norm": 2.560822010040283, | |
| "learning_rate": 5.9194893804335875e-06, | |
| "loss": 0.4402, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 2.043779562204378, | |
| "grad_norm": 2.457819938659668, | |
| "learning_rate": 5.915087487619677e-06, | |
| "loss": 0.457, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 2.045979540204598, | |
| "grad_norm": 2.840198278427124, | |
| "learning_rate": 5.910685594805767e-06, | |
| "loss": 0.4457, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 2.048179518204818, | |
| "grad_norm": 3.289562940597534, | |
| "learning_rate": 5.9062837019918575e-06, | |
| "loss": 0.4458, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 2.050379496205038, | |
| "grad_norm": 3.20574688911438, | |
| "learning_rate": 5.901881809177947e-06, | |
| "loss": 0.4464, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 2.052579474205258, | |
| "grad_norm": 3.1382062435150146, | |
| "learning_rate": 5.897479916364037e-06, | |
| "loss": 0.4407, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 2.054779452205478, | |
| "grad_norm": 2.4946656227111816, | |
| "learning_rate": 5.8930780235501275e-06, | |
| "loss": 0.4404, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 2.056979430205698, | |
| "grad_norm": 3.4237630367279053, | |
| "learning_rate": 5.888676130736217e-06, | |
| "loss": 0.4549, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 2.059179408205918, | |
| "grad_norm": 2.6181180477142334, | |
| "learning_rate": 5.884274237922307e-06, | |
| "loss": 0.4305, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 2.061379386206138, | |
| "grad_norm": 2.9076225757598877, | |
| "learning_rate": 5.8798723451083975e-06, | |
| "loss": 0.4543, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 2.063579364206358, | |
| "grad_norm": 2.6111700534820557, | |
| "learning_rate": 5.875470452294487e-06, | |
| "loss": 0.4426, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 2.0657793422065778, | |
| "grad_norm": 3.1381430625915527, | |
| "learning_rate": 5.871068559480577e-06, | |
| "loss": 0.4509, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 2.0679793202067978, | |
| "grad_norm": 2.934509754180908, | |
| "learning_rate": 5.8666666666666675e-06, | |
| "loss": 0.4538, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 2.0701792982070177, | |
| "grad_norm": 2.8510279655456543, | |
| "learning_rate": 5.862264773852757e-06, | |
| "loss": 0.4396, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 2.0723792762072377, | |
| "grad_norm": 2.753408670425415, | |
| "learning_rate": 5.857862881038847e-06, | |
| "loss": 0.4498, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 2.0745792542074577, | |
| "grad_norm": 2.5191516876220703, | |
| "learning_rate": 5.8534609882249375e-06, | |
| "loss": 0.4355, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 2.076779232207678, | |
| "grad_norm": 3.058117628097534, | |
| "learning_rate": 5.849059095411027e-06, | |
| "loss": 0.4496, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 2.078979210207898, | |
| "grad_norm": 2.3892626762390137, | |
| "learning_rate": 5.844657202597117e-06, | |
| "loss": 0.448, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 2.081179188208118, | |
| "grad_norm": 3.303252935409546, | |
| "learning_rate": 5.840255309783207e-06, | |
| "loss": 0.4423, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 2.083379166208338, | |
| "grad_norm": 2.571668863296509, | |
| "learning_rate": 5.835853416969297e-06, | |
| "loss": 0.4477, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 2.085579144208558, | |
| "grad_norm": 2.8675763607025146, | |
| "learning_rate": 5.831451524155387e-06, | |
| "loss": 0.4402, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 2.087779122208778, | |
| "grad_norm": 1.920617938041687, | |
| "learning_rate": 5.827049631341477e-06, | |
| "loss": 0.4469, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 2.089979100208998, | |
| "grad_norm": 2.4607462882995605, | |
| "learning_rate": 5.822647738527567e-06, | |
| "loss": 0.4578, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 2.092179078209218, | |
| "grad_norm": 2.3950858116149902, | |
| "learning_rate": 5.818245845713657e-06, | |
| "loss": 0.449, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 2.094379056209438, | |
| "grad_norm": 2.5188486576080322, | |
| "learning_rate": 5.813843952899747e-06, | |
| "loss": 0.4411, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 2.096579034209658, | |
| "grad_norm": 2.665241003036499, | |
| "learning_rate": 5.809442060085837e-06, | |
| "loss": 0.4555, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 2.098779012209878, | |
| "grad_norm": 3.0195603370666504, | |
| "learning_rate": 5.805040167271927e-06, | |
| "loss": 0.4605, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 2.100978990210098, | |
| "grad_norm": 3.2705276012420654, | |
| "learning_rate": 5.800638274458017e-06, | |
| "loss": 0.437, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 2.103178968210318, | |
| "grad_norm": 2.4358837604522705, | |
| "learning_rate": 5.796236381644107e-06, | |
| "loss": 0.4556, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 2.105378946210538, | |
| "grad_norm": 2.609314203262329, | |
| "learning_rate": 5.791834488830197e-06, | |
| "loss": 0.4396, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 2.107578924210758, | |
| "grad_norm": 2.715202808380127, | |
| "learning_rate": 5.787432596016287e-06, | |
| "loss": 0.4409, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 2.109778902210978, | |
| "grad_norm": 2.89326548576355, | |
| "learning_rate": 5.783030703202378e-06, | |
| "loss": 0.4473, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 2.111978880211198, | |
| "grad_norm": 2.722426414489746, | |
| "learning_rate": 5.778628810388468e-06, | |
| "loss": 0.4392, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 2.114178858211418, | |
| "grad_norm": 2.5516304969787598, | |
| "learning_rate": 5.774226917574558e-06, | |
| "loss": 0.4327, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 2.116378836211638, | |
| "grad_norm": 1.6953123807907104, | |
| "learning_rate": 5.769825024760648e-06, | |
| "loss": 0.4354, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 2.118578814211858, | |
| "grad_norm": 3.260712146759033, | |
| "learning_rate": 5.765423131946738e-06, | |
| "loss": 0.4587, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 2.1207787922120778, | |
| "grad_norm": 3.15496826171875, | |
| "learning_rate": 5.761021239132828e-06, | |
| "loss": 0.4455, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 2.1229787702122977, | |
| "grad_norm": 3.02713680267334, | |
| "learning_rate": 5.756619346318918e-06, | |
| "loss": 0.443, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 2.1251787482125177, | |
| "grad_norm": 2.6551177501678467, | |
| "learning_rate": 5.752217453505008e-06, | |
| "loss": 0.4361, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 2.1273787262127377, | |
| "grad_norm": 3.143676996231079, | |
| "learning_rate": 5.7478155606910975e-06, | |
| "loss": 0.4463, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 2.1295787042129577, | |
| "grad_norm": 3.07769775390625, | |
| "learning_rate": 5.743413667877188e-06, | |
| "loss": 0.4563, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 2.1317786822131777, | |
| "grad_norm": 2.862227439880371, | |
| "learning_rate": 5.739011775063278e-06, | |
| "loss": 0.4393, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 2.1339786602133977, | |
| "grad_norm": 2.652214288711548, | |
| "learning_rate": 5.7346098822493675e-06, | |
| "loss": 0.443, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 2.136178638213618, | |
| "grad_norm": 2.3733363151550293, | |
| "learning_rate": 5.730207989435458e-06, | |
| "loss": 0.4449, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 2.138378616213838, | |
| "grad_norm": 2.734473705291748, | |
| "learning_rate": 5.725806096621548e-06, | |
| "loss": 0.4357, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 2.140578594214058, | |
| "grad_norm": 2.783421039581299, | |
| "learning_rate": 5.7214042038076375e-06, | |
| "loss": 0.434, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 2.142778572214278, | |
| "grad_norm": 2.4740219116210938, | |
| "learning_rate": 5.717002310993728e-06, | |
| "loss": 0.4417, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 2.144978550214498, | |
| "grad_norm": 2.809589147567749, | |
| "learning_rate": 5.712600418179818e-06, | |
| "loss": 0.4507, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 2.147178528214718, | |
| "grad_norm": 2.179594039916992, | |
| "learning_rate": 5.7081985253659075e-06, | |
| "loss": 0.4552, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 2.149378506214938, | |
| "grad_norm": 2.5812172889709473, | |
| "learning_rate": 5.703796632551998e-06, | |
| "loss": 0.4462, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 2.151578484215158, | |
| "grad_norm": 2.6970343589782715, | |
| "learning_rate": 5.699394739738088e-06, | |
| "loss": 0.4448, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 2.153778462215378, | |
| "grad_norm": 3.2081048488616943, | |
| "learning_rate": 5.6949928469241775e-06, | |
| "loss": 0.4477, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 2.155978440215598, | |
| "grad_norm": 2.283027410507202, | |
| "learning_rate": 5.690590954110268e-06, | |
| "loss": 0.4554, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 2.158178418215818, | |
| "grad_norm": 2.4790256023406982, | |
| "learning_rate": 5.686189061296358e-06, | |
| "loss": 0.4443, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 2.160378396216038, | |
| "grad_norm": 3.0653131008148193, | |
| "learning_rate": 5.6817871684824475e-06, | |
| "loss": 0.4435, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 2.162578374216258, | |
| "grad_norm": 3.14249849319458, | |
| "learning_rate": 5.677385275668538e-06, | |
| "loss": 0.4528, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 2.164778352216478, | |
| "grad_norm": 3.3730337619781494, | |
| "learning_rate": 5.672983382854628e-06, | |
| "loss": 0.4397, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 2.166978330216698, | |
| "grad_norm": 3.2641589641571045, | |
| "learning_rate": 5.6685814900407175e-06, | |
| "loss": 0.4365, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 2.169178308216918, | |
| "grad_norm": 3.698474407196045, | |
| "learning_rate": 5.664179597226808e-06, | |
| "loss": 0.4416, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 2.171378286217138, | |
| "grad_norm": 2.253495454788208, | |
| "learning_rate": 5.659777704412897e-06, | |
| "loss": 0.4534, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 2.173578264217358, | |
| "grad_norm": 3.342864990234375, | |
| "learning_rate": 5.6553758115989875e-06, | |
| "loss": 0.4546, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 2.1757782422175778, | |
| "grad_norm": 2.818357229232788, | |
| "learning_rate": 5.650973918785078e-06, | |
| "loss": 0.4327, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 2.1779782202177977, | |
| "grad_norm": 3.623086452484131, | |
| "learning_rate": 5.646572025971167e-06, | |
| "loss": 0.4566, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 2.1801781982180177, | |
| "grad_norm": 3.0294673442840576, | |
| "learning_rate": 5.6421701331572575e-06, | |
| "loss": 0.4437, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 2.1823781762182377, | |
| "grad_norm": 2.562649726867676, | |
| "learning_rate": 5.637768240343348e-06, | |
| "loss": 0.4504, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 2.1845781542184577, | |
| "grad_norm": 2.9399819374084473, | |
| "learning_rate": 5.633366347529439e-06, | |
| "loss": 0.4405, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 2.1867781322186777, | |
| "grad_norm": 2.589012861251831, | |
| "learning_rate": 5.628964454715528e-06, | |
| "loss": 0.4332, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 2.1889781102188977, | |
| "grad_norm": 3.24257230758667, | |
| "learning_rate": 5.624562561901619e-06, | |
| "loss": 0.4486, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 2.1911780882191176, | |
| "grad_norm": 2.6864874362945557, | |
| "learning_rate": 5.620160669087709e-06, | |
| "loss": 0.4476, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 2.1933780662193376, | |
| "grad_norm": 2.183894634246826, | |
| "learning_rate": 5.615758776273798e-06, | |
| "loss": 0.4517, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 2.1955780442195576, | |
| "grad_norm": 2.297757625579834, | |
| "learning_rate": 5.611356883459889e-06, | |
| "loss": 0.4414, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 2.1977780222197776, | |
| "grad_norm": 2.6887316703796387, | |
| "learning_rate": 5.606954990645979e-06, | |
| "loss": 0.4359, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 2.199978000219998, | |
| "grad_norm": 2.8383491039276123, | |
| "learning_rate": 5.602553097832068e-06, | |
| "loss": 0.4455, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.199978000219998, | |
| "eval_loss": 0.5539576411247253, | |
| "eval_runtime": 386.4228, | |
| "eval_samples_per_second": 155.27, | |
| "eval_steps_per_second": 4.852, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.202177978220218, | |
| "grad_norm": 2.4842607975006104, | |
| "learning_rate": 5.598151205018159e-06, | |
| "loss": 0.4421, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 2.204377956220438, | |
| "grad_norm": 2.3061771392822266, | |
| "learning_rate": 5.593749312204249e-06, | |
| "loss": 0.4529, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 2.206577934220658, | |
| "grad_norm": 2.9890830516815186, | |
| "learning_rate": 5.589347419390338e-06, | |
| "loss": 0.4251, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 2.208777912220878, | |
| "grad_norm": 2.5472826957702637, | |
| "learning_rate": 5.584945526576429e-06, | |
| "loss": 0.4384, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 2.210977890221098, | |
| "grad_norm": 3.314694881439209, | |
| "learning_rate": 5.580543633762519e-06, | |
| "loss": 0.4372, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 2.213177868221318, | |
| "grad_norm": 3.1046979427337646, | |
| "learning_rate": 5.576141740948608e-06, | |
| "loss": 0.434, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 2.215377846221538, | |
| "grad_norm": 2.180180788040161, | |
| "learning_rate": 5.571739848134699e-06, | |
| "loss": 0.43, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 2.217577824221758, | |
| "grad_norm": 3.7238945960998535, | |
| "learning_rate": 5.567337955320788e-06, | |
| "loss": 0.4404, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 2.219777802221978, | |
| "grad_norm": 3.2101945877075195, | |
| "learning_rate": 5.562936062506878e-06, | |
| "loss": 0.4393, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 2.221977780222198, | |
| "grad_norm": 2.822737455368042, | |
| "learning_rate": 5.558534169692969e-06, | |
| "loss": 0.4407, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 2.224177758222418, | |
| "grad_norm": 2.736593723297119, | |
| "learning_rate": 5.554132276879058e-06, | |
| "loss": 0.4603, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 2.226377736222638, | |
| "grad_norm": 2.5259158611297607, | |
| "learning_rate": 5.549730384065148e-06, | |
| "loss": 0.438, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 2.2285777142228578, | |
| "grad_norm": 2.8023760318756104, | |
| "learning_rate": 5.545328491251239e-06, | |
| "loss": 0.4476, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 2.2307776922230778, | |
| "grad_norm": 3.469649076461792, | |
| "learning_rate": 5.540926598437328e-06, | |
| "loss": 0.4498, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 2.2329776702232977, | |
| "grad_norm": 2.2170920372009277, | |
| "learning_rate": 5.536524705623418e-06, | |
| "loss": 0.4531, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 2.2351776482235177, | |
| "grad_norm": 2.9399514198303223, | |
| "learning_rate": 5.532122812809509e-06, | |
| "loss": 0.4496, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 2.2373776262237377, | |
| "grad_norm": 3.1350746154785156, | |
| "learning_rate": 5.527720919995598e-06, | |
| "loss": 0.4412, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 2.2395776042239577, | |
| "grad_norm": 2.7231826782226562, | |
| "learning_rate": 5.523319027181688e-06, | |
| "loss": 0.4434, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 2.2417775822241777, | |
| "grad_norm": 2.8241002559661865, | |
| "learning_rate": 5.518917134367779e-06, | |
| "loss": 0.4405, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 2.2439775602243976, | |
| "grad_norm": 2.6854066848754883, | |
| "learning_rate": 5.514515241553868e-06, | |
| "loss": 0.4558, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 2.2461775382246176, | |
| "grad_norm": 3.1952197551727295, | |
| "learning_rate": 5.510113348739958e-06, | |
| "loss": 0.4354, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 2.2483775162248376, | |
| "grad_norm": 2.9026472568511963, | |
| "learning_rate": 5.505711455926049e-06, | |
| "loss": 0.4485, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 2.2505774942250576, | |
| "grad_norm": 3.1712558269500732, | |
| "learning_rate": 5.501309563112138e-06, | |
| "loss": 0.4468, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 2.2527774722252776, | |
| "grad_norm": 2.9717068672180176, | |
| "learning_rate": 5.496907670298228e-06, | |
| "loss": 0.4386, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 2.2549774502254976, | |
| "grad_norm": 2.8104095458984375, | |
| "learning_rate": 5.492505777484319e-06, | |
| "loss": 0.4452, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 2.2571774282257175, | |
| "grad_norm": 3.142512798309326, | |
| "learning_rate": 5.488103884670408e-06, | |
| "loss": 0.4487, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 2.259377406225938, | |
| "grad_norm": 3.723659038543701, | |
| "learning_rate": 5.483701991856498e-06, | |
| "loss": 0.449, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 2.2615773842261575, | |
| "grad_norm": 3.365520477294922, | |
| "learning_rate": 5.4793000990425895e-06, | |
| "loss": 0.4409, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 2.263777362226378, | |
| "grad_norm": 2.1158196926116943, | |
| "learning_rate": 5.474898206228679e-06, | |
| "loss": 0.4526, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 2.265977340226598, | |
| "grad_norm": 2.77187442779541, | |
| "learning_rate": 5.470496313414769e-06, | |
| "loss": 0.4597, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 2.268177318226818, | |
| "grad_norm": 3.1668035984039307, | |
| "learning_rate": 5.4660944206008595e-06, | |
| "loss": 0.4515, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 2.270377296227038, | |
| "grad_norm": 3.3199713230133057, | |
| "learning_rate": 5.461692527786949e-06, | |
| "loss": 0.4421, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 2.272577274227258, | |
| "grad_norm": 3.0452702045440674, | |
| "learning_rate": 5.457290634973039e-06, | |
| "loss": 0.451, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 2.274777252227478, | |
| "grad_norm": 2.889191150665283, | |
| "learning_rate": 5.4528887421591295e-06, | |
| "loss": 0.4433, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 2.276977230227698, | |
| "grad_norm": 3.1005496978759766, | |
| "learning_rate": 5.448486849345219e-06, | |
| "loss": 0.459, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 2.279177208227918, | |
| "grad_norm": 3.024289131164551, | |
| "learning_rate": 5.444084956531309e-06, | |
| "loss": 0.4369, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 2.281377186228138, | |
| "grad_norm": 2.3427116870880127, | |
| "learning_rate": 5.4396830637173995e-06, | |
| "loss": 0.4461, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 2.2835771642283578, | |
| "grad_norm": 3.6452486515045166, | |
| "learning_rate": 5.435281170903489e-06, | |
| "loss": 0.4626, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 2.2857771422285778, | |
| "grad_norm": 3.5883066654205322, | |
| "learning_rate": 5.430879278089579e-06, | |
| "loss": 0.4439, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 2.2879771202287977, | |
| "grad_norm": 3.1896305084228516, | |
| "learning_rate": 5.4264773852756695e-06, | |
| "loss": 0.4342, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 2.2901770982290177, | |
| "grad_norm": 3.0149104595184326, | |
| "learning_rate": 5.422075492461759e-06, | |
| "loss": 0.4503, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 2.2923770762292377, | |
| "grad_norm": 3.1118035316467285, | |
| "learning_rate": 5.417673599647849e-06, | |
| "loss": 0.4402, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 2.2945770542294577, | |
| "grad_norm": 3.0000152587890625, | |
| "learning_rate": 5.4132717068339395e-06, | |
| "loss": 0.4321, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 2.2967770322296777, | |
| "grad_norm": 3.1988613605499268, | |
| "learning_rate": 5.408869814020029e-06, | |
| "loss": 0.4458, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 2.2989770102298976, | |
| "grad_norm": 2.5336127281188965, | |
| "learning_rate": 5.404467921206119e-06, | |
| "loss": 0.4412, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 2.3011769882301176, | |
| "grad_norm": 2.478907823562622, | |
| "learning_rate": 5.400066028392209e-06, | |
| "loss": 0.4391, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 2.3033769662303376, | |
| "grad_norm": 3.0680346488952637, | |
| "learning_rate": 5.395664135578299e-06, | |
| "loss": 0.4466, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 2.3055769442305576, | |
| "grad_norm": 2.976754665374756, | |
| "learning_rate": 5.391262242764389e-06, | |
| "loss": 0.4534, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 2.3077769222307776, | |
| "grad_norm": 2.921550989151001, | |
| "learning_rate": 5.386860349950479e-06, | |
| "loss": 0.4461, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 2.3099769002309976, | |
| "grad_norm": 2.6085400581359863, | |
| "learning_rate": 5.382458457136569e-06, | |
| "loss": 0.439, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 2.3121768782312175, | |
| "grad_norm": 3.231365203857422, | |
| "learning_rate": 5.378056564322659e-06, | |
| "loss": 0.4511, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 2.3143768562314375, | |
| "grad_norm": 3.2471604347229004, | |
| "learning_rate": 5.373654671508749e-06, | |
| "loss": 0.434, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 2.3165768342316575, | |
| "grad_norm": 3.265526056289673, | |
| "learning_rate": 5.369252778694839e-06, | |
| "loss": 0.4414, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 2.318776812231878, | |
| "grad_norm": 2.632627010345459, | |
| "learning_rate": 5.364850885880929e-06, | |
| "loss": 0.4469, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 2.3209767902320975, | |
| "grad_norm": 3.3575692176818848, | |
| "learning_rate": 5.360448993067019e-06, | |
| "loss": 0.4517, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 2.323176768232318, | |
| "grad_norm": 2.751236915588379, | |
| "learning_rate": 5.356047100253109e-06, | |
| "loss": 0.4321, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 2.3253767462325374, | |
| "grad_norm": 3.4512314796447754, | |
| "learning_rate": 5.351645207439199e-06, | |
| "loss": 0.4513, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 2.327576724232758, | |
| "grad_norm": 2.5892844200134277, | |
| "learning_rate": 5.347243314625289e-06, | |
| "loss": 0.448, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 2.329776702232978, | |
| "grad_norm": 3.1810543537139893, | |
| "learning_rate": 5.342841421811379e-06, | |
| "loss": 0.4489, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 2.331976680233198, | |
| "grad_norm": 3.527425765991211, | |
| "learning_rate": 5.338439528997469e-06, | |
| "loss": 0.4362, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 2.334176658233418, | |
| "grad_norm": 2.97705340385437, | |
| "learning_rate": 5.334037636183559e-06, | |
| "loss": 0.4424, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 2.336376636233638, | |
| "grad_norm": 2.3554928302764893, | |
| "learning_rate": 5.329635743369649e-06, | |
| "loss": 0.4354, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 2.3385766142338578, | |
| "grad_norm": 3.598785161972046, | |
| "learning_rate": 5.32523385055574e-06, | |
| "loss": 0.4429, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 2.3407765922340777, | |
| "grad_norm": 3.603203058242798, | |
| "learning_rate": 5.32083195774183e-06, | |
| "loss": 0.4508, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 2.3429765702342977, | |
| "grad_norm": 2.5761771202087402, | |
| "learning_rate": 5.31643006492792e-06, | |
| "loss": 0.448, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 2.3451765482345177, | |
| "grad_norm": 3.6221818923950195, | |
| "learning_rate": 5.31202817211401e-06, | |
| "loss": 0.4305, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 2.3473765262347377, | |
| "grad_norm": 3.062361717224121, | |
| "learning_rate": 5.3076262793000995e-06, | |
| "loss": 0.45, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 2.3495765042349577, | |
| "grad_norm": 2.6559677124023438, | |
| "learning_rate": 5.30322438648619e-06, | |
| "loss": 0.4569, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 2.3517764822351777, | |
| "grad_norm": 2.8080978393554688, | |
| "learning_rate": 5.29882249367228e-06, | |
| "loss": 0.4376, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 2.3539764602353976, | |
| "grad_norm": 2.880061388015747, | |
| "learning_rate": 5.2944206008583695e-06, | |
| "loss": 0.4435, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 2.3561764382356176, | |
| "grad_norm": 2.902592420578003, | |
| "learning_rate": 5.29001870804446e-06, | |
| "loss": 0.4446, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 2.3583764162358376, | |
| "grad_norm": 2.560408592224121, | |
| "learning_rate": 5.28561681523055e-06, | |
| "loss": 0.4533, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 2.3605763942360576, | |
| "grad_norm": 3.5301778316497803, | |
| "learning_rate": 5.2812149224166395e-06, | |
| "loss": 0.4499, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 2.3627763722362776, | |
| "grad_norm": 3.1170268058776855, | |
| "learning_rate": 5.27681302960273e-06, | |
| "loss": 0.4392, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 2.3649763502364975, | |
| "grad_norm": 2.9975242614746094, | |
| "learning_rate": 5.27241113678882e-06, | |
| "loss": 0.4443, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 2.3671763282367175, | |
| "grad_norm": 2.9318737983703613, | |
| "learning_rate": 5.2680092439749095e-06, | |
| "loss": 0.4382, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 2.3693763062369375, | |
| "grad_norm": 2.7009778022766113, | |
| "learning_rate": 5.263607351161e-06, | |
| "loss": 0.4486, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 2.3715762842371575, | |
| "grad_norm": 3.265301465988159, | |
| "learning_rate": 5.25920545834709e-06, | |
| "loss": 0.4386, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 2.3737762622373775, | |
| "grad_norm": 3.5099949836730957, | |
| "learning_rate": 5.2548035655331795e-06, | |
| "loss": 0.4354, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 2.3759762402375975, | |
| "grad_norm": 2.997199296951294, | |
| "learning_rate": 5.25040167271927e-06, | |
| "loss": 0.4449, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 2.3781762182378174, | |
| "grad_norm": 3.5661022663116455, | |
| "learning_rate": 5.24599977990536e-06, | |
| "loss": 0.4533, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 2.3803761962380374, | |
| "grad_norm": 2.6311588287353516, | |
| "learning_rate": 5.2415978870914495e-06, | |
| "loss": 0.4535, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 2.382576174238258, | |
| "grad_norm": 2.3854992389678955, | |
| "learning_rate": 5.23719599427754e-06, | |
| "loss": 0.441, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 2.3847761522384774, | |
| "grad_norm": 3.3768720626831055, | |
| "learning_rate": 5.23279410146363e-06, | |
| "loss": 0.4467, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 2.386976130238698, | |
| "grad_norm": 3.2119550704956055, | |
| "learning_rate": 5.2283922086497195e-06, | |
| "loss": 0.4608, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 2.389176108238918, | |
| "grad_norm": 3.434720277786255, | |
| "learning_rate": 5.22399031583581e-06, | |
| "loss": 0.4415, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 2.3913760862391378, | |
| "grad_norm": 2.7094149589538574, | |
| "learning_rate": 5.219588423021899e-06, | |
| "loss": 0.4562, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 2.3935760642393578, | |
| "grad_norm": 2.9980342388153076, | |
| "learning_rate": 5.2151865302079895e-06, | |
| "loss": 0.4564, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 2.3957760422395777, | |
| "grad_norm": 3.241988182067871, | |
| "learning_rate": 5.21078463739408e-06, | |
| "loss": 0.4494, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 2.3979760202397977, | |
| "grad_norm": 3.08487606048584, | |
| "learning_rate": 5.206382744580169e-06, | |
| "loss": 0.439, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 2.4001759982400177, | |
| "grad_norm": 3.0313308238983154, | |
| "learning_rate": 5.2019808517662595e-06, | |
| "loss": 0.4412, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 2.4023759762402377, | |
| "grad_norm": 3.303107738494873, | |
| "learning_rate": 5.19757895895235e-06, | |
| "loss": 0.4437, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 2.4045759542404577, | |
| "grad_norm": 2.7623887062072754, | |
| "learning_rate": 5.193177066138439e-06, | |
| "loss": 0.4426, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 2.4067759322406777, | |
| "grad_norm": 3.3932597637176514, | |
| "learning_rate": 5.1887751733245295e-06, | |
| "loss": 0.4381, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 2.4089759102408976, | |
| "grad_norm": 2.618532657623291, | |
| "learning_rate": 5.18437328051062e-06, | |
| "loss": 0.4466, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 2.4111758882411176, | |
| "grad_norm": 2.4478089809417725, | |
| "learning_rate": 5.179971387696709e-06, | |
| "loss": 0.4494, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 2.4133758662413376, | |
| "grad_norm": 3.646751642227173, | |
| "learning_rate": 5.1755694948828e-06, | |
| "loss": 0.4684, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 2.4155758442415576, | |
| "grad_norm": 2.54317569732666, | |
| "learning_rate": 5.171167602068891e-06, | |
| "loss": 0.4467, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 2.4177758222417776, | |
| "grad_norm": 2.939131021499634, | |
| "learning_rate": 5.166765709254981e-06, | |
| "loss": 0.4438, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 2.4199758002419975, | |
| "grad_norm": 3.4897677898406982, | |
| "learning_rate": 5.16236381644107e-06, | |
| "loss": 0.4444, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 2.4221757782422175, | |
| "grad_norm": 3.108614683151245, | |
| "learning_rate": 5.157961923627161e-06, | |
| "loss": 0.4639, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 2.4243757562424375, | |
| "grad_norm": 3.135338068008423, | |
| "learning_rate": 5.153560030813251e-06, | |
| "loss": 0.4424, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 2.4265757342426575, | |
| "grad_norm": 2.249314785003662, | |
| "learning_rate": 5.14915813799934e-06, | |
| "loss": 0.4298, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 2.4287757122428775, | |
| "grad_norm": 2.582010269165039, | |
| "learning_rate": 5.144756245185431e-06, | |
| "loss": 0.4404, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 2.4309756902430975, | |
| "grad_norm": 3.1904852390289307, | |
| "learning_rate": 5.140354352371521e-06, | |
| "loss": 0.4526, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 2.4331756682433174, | |
| "grad_norm": 3.054769277572632, | |
| "learning_rate": 5.13595245955761e-06, | |
| "loss": 0.4332, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 2.4353756462435374, | |
| "grad_norm": 2.8248226642608643, | |
| "learning_rate": 5.131550566743701e-06, | |
| "loss": 0.446, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 2.4375756242437574, | |
| "grad_norm": 3.6720070838928223, | |
| "learning_rate": 5.12714867392979e-06, | |
| "loss": 0.445, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 2.4397756022439774, | |
| "grad_norm": 2.920863389968872, | |
| "learning_rate": 5.12274678111588e-06, | |
| "loss": 0.4482, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 2.441975580244198, | |
| "grad_norm": 2.426818370819092, | |
| "learning_rate": 5.118344888301971e-06, | |
| "loss": 0.452, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 2.4441755582444173, | |
| "grad_norm": 3.390026330947876, | |
| "learning_rate": 5.11394299548806e-06, | |
| "loss": 0.4413, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 2.4463755362446378, | |
| "grad_norm": 3.1187210083007812, | |
| "learning_rate": 5.10954110267415e-06, | |
| "loss": 0.4381, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 2.4485755142448573, | |
| "grad_norm": 3.0595436096191406, | |
| "learning_rate": 5.105139209860241e-06, | |
| "loss": 0.4432, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 2.4507754922450777, | |
| "grad_norm": 2.9489197731018066, | |
| "learning_rate": 5.10073731704633e-06, | |
| "loss": 0.438, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 2.4529754702452977, | |
| "grad_norm": 2.4834353923797607, | |
| "learning_rate": 5.09633542423242e-06, | |
| "loss": 0.4452, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 2.4551754482455177, | |
| "grad_norm": 3.135232925415039, | |
| "learning_rate": 5.091933531418511e-06, | |
| "loss": 0.4493, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 2.4573754262457377, | |
| "grad_norm": 3.158200263977051, | |
| "learning_rate": 5.0875316386046e-06, | |
| "loss": 0.446, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 2.4595754042459577, | |
| "grad_norm": 3.199408531188965, | |
| "learning_rate": 5.08312974579069e-06, | |
| "loss": 0.4457, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 2.4617753822461776, | |
| "grad_norm": 2.692122459411621, | |
| "learning_rate": 5.078727852976781e-06, | |
| "loss": 0.4453, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 2.4639753602463976, | |
| "grad_norm": 2.708963632583618, | |
| "learning_rate": 5.07432596016287e-06, | |
| "loss": 0.4375, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 2.4661753382466176, | |
| "grad_norm": 2.9427683353424072, | |
| "learning_rate": 5.06992406734896e-06, | |
| "loss": 0.4419, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 2.4683753162468376, | |
| "grad_norm": 3.154421329498291, | |
| "learning_rate": 5.065522174535051e-06, | |
| "loss": 0.4539, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 2.4705752942470576, | |
| "grad_norm": 2.364830255508423, | |
| "learning_rate": 5.06112028172114e-06, | |
| "loss": 0.4535, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 2.4727752722472776, | |
| "grad_norm": 3.534681797027588, | |
| "learning_rate": 5.05671838890723e-06, | |
| "loss": 0.4457, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 2.4749752502474975, | |
| "grad_norm": 3.2701926231384277, | |
| "learning_rate": 5.052316496093321e-06, | |
| "loss": 0.447, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 2.4771752282477175, | |
| "grad_norm": 2.395355701446533, | |
| "learning_rate": 5.04791460327941e-06, | |
| "loss": 0.4399, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 2.4793752062479375, | |
| "grad_norm": 3.506509780883789, | |
| "learning_rate": 5.0435127104655e-06, | |
| "loss": 0.4527, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 2.4815751842481575, | |
| "grad_norm": 2.844534397125244, | |
| "learning_rate": 5.03911081765159e-06, | |
| "loss": 0.4436, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 2.4837751622483775, | |
| "grad_norm": 3.3356661796569824, | |
| "learning_rate": 5.03470892483768e-06, | |
| "loss": 0.4432, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 2.4859751402485974, | |
| "grad_norm": 2.6262450218200684, | |
| "learning_rate": 5.03030703202377e-06, | |
| "loss": 0.4508, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 2.4881751182488174, | |
| "grad_norm": 2.852914810180664, | |
| "learning_rate": 5.02590513920986e-06, | |
| "loss": 0.453, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 2.4903750962490374, | |
| "grad_norm": 3.224490165710449, | |
| "learning_rate": 5.021503246395951e-06, | |
| "loss": 0.4637, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 2.4925750742492574, | |
| "grad_norm": 2.180025577545166, | |
| "learning_rate": 5.017101353582041e-06, | |
| "loss": 0.4316, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 2.4947750522494774, | |
| "grad_norm": 2.4598264694213867, | |
| "learning_rate": 5.0126994607681314e-06, | |
| "loss": 0.4381, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 2.4969750302496974, | |
| "grad_norm": 2.587557315826416, | |
| "learning_rate": 5.008297567954221e-06, | |
| "loss": 0.4469, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 2.4991750082499173, | |
| "grad_norm": 2.93766450881958, | |
| "learning_rate": 5.003895675140311e-06, | |
| "loss": 0.459, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 2.5013749862501378, | |
| "grad_norm": 2.926692485809326, | |
| "learning_rate": 4.999493782326401e-06, | |
| "loss": 0.4444, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 2.5035749642503573, | |
| "grad_norm": 3.22385311126709, | |
| "learning_rate": 4.995091889512491e-06, | |
| "loss": 0.4395, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 2.5057749422505777, | |
| "grad_norm": 2.241689682006836, | |
| "learning_rate": 4.99068999669858e-06, | |
| "loss": 0.4461, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 2.5079749202507973, | |
| "grad_norm": 3.1882591247558594, | |
| "learning_rate": 4.986288103884671e-06, | |
| "loss": 0.4482, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 2.5101748982510177, | |
| "grad_norm": 2.0357823371887207, | |
| "learning_rate": 4.981886211070761e-06, | |
| "loss": 0.4291, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 2.5123748762512372, | |
| "grad_norm": 3.0307114124298096, | |
| "learning_rate": 4.977484318256851e-06, | |
| "loss": 0.4556, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 2.5145748542514577, | |
| "grad_norm": 3.274482488632202, | |
| "learning_rate": 4.9730824254429414e-06, | |
| "loss": 0.4561, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 2.5167748322516776, | |
| "grad_norm": 2.6364364624023438, | |
| "learning_rate": 4.968680532629031e-06, | |
| "loss": 0.4462, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 2.5189748102518976, | |
| "grad_norm": 3.74102520942688, | |
| "learning_rate": 4.964278639815121e-06, | |
| "loss": 0.4343, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 2.5211747882521176, | |
| "grad_norm": 2.6400420665740967, | |
| "learning_rate": 4.9598767470012114e-06, | |
| "loss": 0.431, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 2.5233747662523376, | |
| "grad_norm": 3.313936948776245, | |
| "learning_rate": 4.955474854187301e-06, | |
| "loss": 0.4361, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 2.5255747442525576, | |
| "grad_norm": 3.1733415126800537, | |
| "learning_rate": 4.951072961373391e-06, | |
| "loss": 0.4346, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 2.5277747222527776, | |
| "grad_norm": 3.2925596237182617, | |
| "learning_rate": 4.946671068559481e-06, | |
| "loss": 0.4382, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 2.5299747002529975, | |
| "grad_norm": 3.0472724437713623, | |
| "learning_rate": 4.942269175745571e-06, | |
| "loss": 0.4294, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 2.5321746782532175, | |
| "grad_norm": 2.684380054473877, | |
| "learning_rate": 4.937867282931661e-06, | |
| "loss": 0.446, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 2.5343746562534375, | |
| "grad_norm": 2.86007022857666, | |
| "learning_rate": 4.933465390117751e-06, | |
| "loss": 0.4445, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 2.5365746342536575, | |
| "grad_norm": 3.1201276779174805, | |
| "learning_rate": 4.929063497303841e-06, | |
| "loss": 0.4363, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 2.5387746122538775, | |
| "grad_norm": 2.558084726333618, | |
| "learning_rate": 4.924661604489931e-06, | |
| "loss": 0.4272, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 2.5409745902540974, | |
| "grad_norm": 3.3204970359802246, | |
| "learning_rate": 4.920259711676021e-06, | |
| "loss": 0.425, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 2.5431745682543174, | |
| "grad_norm": 2.824089288711548, | |
| "learning_rate": 4.915857818862111e-06, | |
| "loss": 0.4428, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 2.5453745462545374, | |
| "grad_norm": 3.430072784423828, | |
| "learning_rate": 4.911455926048201e-06, | |
| "loss": 0.435, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 2.5475745242547574, | |
| "grad_norm": 3.415888547897339, | |
| "learning_rate": 4.9070540332342914e-06, | |
| "loss": 0.4546, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 2.5497745022549774, | |
| "grad_norm": 3.0503039360046387, | |
| "learning_rate": 4.902652140420382e-06, | |
| "loss": 0.4359, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 2.5519744802551974, | |
| "grad_norm": 3.0880868434906006, | |
| "learning_rate": 4.898250247606471e-06, | |
| "loss": 0.445, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 2.5541744582554173, | |
| "grad_norm": 3.1325924396514893, | |
| "learning_rate": 4.8938483547925614e-06, | |
| "loss": 0.4339, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 2.5563744362556373, | |
| "grad_norm": 3.6278367042541504, | |
| "learning_rate": 4.889446461978652e-06, | |
| "loss": 0.4573, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 2.5585744142558573, | |
| "grad_norm": 2.8526251316070557, | |
| "learning_rate": 4.885044569164741e-06, | |
| "loss": 0.4359, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 2.5607743922560773, | |
| "grad_norm": 2.335749864578247, | |
| "learning_rate": 4.8806426763508314e-06, | |
| "loss": 0.4336, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 2.5629743702562973, | |
| "grad_norm": 2.912464141845703, | |
| "learning_rate": 4.876240783536922e-06, | |
| "loss": 0.4463, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 2.5651743482565177, | |
| "grad_norm": 3.1221654415130615, | |
| "learning_rate": 4.871838890723011e-06, | |
| "loss": 0.4394, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 2.5673743262567372, | |
| "grad_norm": 3.0189766883850098, | |
| "learning_rate": 4.8674369979091014e-06, | |
| "loss": 0.4279, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 2.5695743042569577, | |
| "grad_norm": 2.7130327224731445, | |
| "learning_rate": 4.863035105095191e-06, | |
| "loss": 0.4397, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 2.571774282257177, | |
| "grad_norm": 3.0579302310943604, | |
| "learning_rate": 4.858633212281281e-06, | |
| "loss": 0.4509, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 2.5739742602573976, | |
| "grad_norm": 2.568333625793457, | |
| "learning_rate": 4.8542313194673714e-06, | |
| "loss": 0.4391, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 2.576174238257617, | |
| "grad_norm": 3.574082612991333, | |
| "learning_rate": 4.849829426653461e-06, | |
| "loss": 0.4354, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 2.5783742162578376, | |
| "grad_norm": 2.516016721725464, | |
| "learning_rate": 4.845427533839551e-06, | |
| "loss": 0.4417, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 2.5805741942580576, | |
| "grad_norm": 2.464613199234009, | |
| "learning_rate": 4.8410256410256414e-06, | |
| "loss": 0.4464, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 2.5827741722582775, | |
| "grad_norm": 2.841379404067993, | |
| "learning_rate": 4.836623748211731e-06, | |
| "loss": 0.45, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 2.5849741502584975, | |
| "grad_norm": 2.6032309532165527, | |
| "learning_rate": 4.832221855397821e-06, | |
| "loss": 0.4258, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 2.5871741282587175, | |
| "grad_norm": 2.964139938354492, | |
| "learning_rate": 4.8278199625839114e-06, | |
| "loss": 0.4495, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 2.5893741062589375, | |
| "grad_norm": 4.020530700683594, | |
| "learning_rate": 4.823418069770002e-06, | |
| "loss": 0.435, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 2.5915740842591575, | |
| "grad_norm": 2.493910312652588, | |
| "learning_rate": 4.819016176956092e-06, | |
| "loss": 0.4371, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 2.5937740622593775, | |
| "grad_norm": 3.542248249053955, | |
| "learning_rate": 4.8146142841421814e-06, | |
| "loss": 0.4489, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 2.5959740402595974, | |
| "grad_norm": 3.5736639499664307, | |
| "learning_rate": 4.810212391328272e-06, | |
| "loss": 0.43, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 2.5981740182598174, | |
| "grad_norm": 2.6904780864715576, | |
| "learning_rate": 4.805810498514362e-06, | |
| "loss": 0.4326, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 2.6003739962600374, | |
| "grad_norm": 3.0727078914642334, | |
| "learning_rate": 4.8014086057004514e-06, | |
| "loss": 0.4338, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 2.6025739742602574, | |
| "grad_norm": 3.355623722076416, | |
| "learning_rate": 4.797006712886542e-06, | |
| "loss": 0.4489, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 2.6047739522604774, | |
| "grad_norm": 2.657305955886841, | |
| "learning_rate": 4.792604820072632e-06, | |
| "loss": 0.4416, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 2.6069739302606973, | |
| "grad_norm": 2.6770079135894775, | |
| "learning_rate": 4.7882029272587214e-06, | |
| "loss": 0.4317, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 2.6091739082609173, | |
| "grad_norm": 3.4358301162719727, | |
| "learning_rate": 4.783801034444812e-06, | |
| "loss": 0.4307, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 2.6113738862611373, | |
| "grad_norm": 2.719377040863037, | |
| "learning_rate": 4.779399141630901e-06, | |
| "loss": 0.4452, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 2.6135738642613573, | |
| "grad_norm": 3.2705419063568115, | |
| "learning_rate": 4.7749972488169914e-06, | |
| "loss": 0.4391, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 2.6157738422615773, | |
| "grad_norm": 2.5769264698028564, | |
| "learning_rate": 4.770595356003082e-06, | |
| "loss": 0.4566, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 2.6179738202617973, | |
| "grad_norm": 3.0442352294921875, | |
| "learning_rate": 4.766193463189171e-06, | |
| "loss": 0.4394, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 2.6201737982620172, | |
| "grad_norm": 2.913459539413452, | |
| "learning_rate": 4.7617915703752614e-06, | |
| "loss": 0.4445, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 2.622373776262237, | |
| "grad_norm": 2.6879146099090576, | |
| "learning_rate": 4.757389677561352e-06, | |
| "loss": 0.4433, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 2.6245737542624576, | |
| "grad_norm": 3.6051576137542725, | |
| "learning_rate": 4.752987784747442e-06, | |
| "loss": 0.4385, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 2.626773732262677, | |
| "grad_norm": 2.6867752075195312, | |
| "learning_rate": 4.748585891933532e-06, | |
| "loss": 0.4569, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 2.6289737102628976, | |
| "grad_norm": 2.795522928237915, | |
| "learning_rate": 4.744183999119622e-06, | |
| "loss": 0.4413, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 2.631173688263117, | |
| "grad_norm": 3.2469236850738525, | |
| "learning_rate": 4.739782106305712e-06, | |
| "loss": 0.4539, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 2.6333736662633376, | |
| "grad_norm": 3.3304011821746826, | |
| "learning_rate": 4.735380213491802e-06, | |
| "loss": 0.46, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 2.635573644263557, | |
| "grad_norm": 3.8114166259765625, | |
| "learning_rate": 4.730978320677892e-06, | |
| "loss": 0.4472, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 2.6377736222637775, | |
| "grad_norm": 3.1319470405578613, | |
| "learning_rate": 4.726576427863982e-06, | |
| "loss": 0.4381, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 2.6399736002639975, | |
| "grad_norm": 3.4921023845672607, | |
| "learning_rate": 4.722174535050072e-06, | |
| "loss": 0.4397, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 2.6421735782642175, | |
| "grad_norm": 2.191277265548706, | |
| "learning_rate": 4.717772642236162e-06, | |
| "loss": 0.4329, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 2.6443735562644375, | |
| "grad_norm": 3.3526830673217773, | |
| "learning_rate": 4.713370749422252e-06, | |
| "loss": 0.4425, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 2.6465735342646575, | |
| "grad_norm": 3.1728663444519043, | |
| "learning_rate": 4.708968856608342e-06, | |
| "loss": 0.4304, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 2.6487735122648774, | |
| "grad_norm": 2.8686399459838867, | |
| "learning_rate": 4.704566963794432e-06, | |
| "loss": 0.4414, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 2.6509734902650974, | |
| "grad_norm": 3.0160744190216064, | |
| "learning_rate": 4.700165070980522e-06, | |
| "loss": 0.4318, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 2.6531734682653174, | |
| "grad_norm": 3.1512398719787598, | |
| "learning_rate": 4.695763178166612e-06, | |
| "loss": 0.4469, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 2.6553734462655374, | |
| "grad_norm": 3.48527193069458, | |
| "learning_rate": 4.691361285352702e-06, | |
| "loss": 0.4239, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 2.6575734242657574, | |
| "grad_norm": 3.1018311977386475, | |
| "learning_rate": 4.686959392538792e-06, | |
| "loss": 0.4555, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 2.6597734022659774, | |
| "grad_norm": 2.867196559906006, | |
| "learning_rate": 4.6825574997248814e-06, | |
| "loss": 0.4392, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 2.6619733802661973, | |
| "grad_norm": 3.6857316493988037, | |
| "learning_rate": 4.6781556069109726e-06, | |
| "loss": 0.4384, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 2.6641733582664173, | |
| "grad_norm": 3.517019748687744, | |
| "learning_rate": 4.673753714097062e-06, | |
| "loss": 0.4323, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 2.6663733362666373, | |
| "grad_norm": 2.7562782764434814, | |
| "learning_rate": 4.669351821283152e-06, | |
| "loss": 0.4425, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 2.6685733142668573, | |
| "grad_norm": 3.2904388904571533, | |
| "learning_rate": 4.6649499284692426e-06, | |
| "loss": 0.4365, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 2.6707732922670773, | |
| "grad_norm": 2.5950496196746826, | |
| "learning_rate": 4.660548035655332e-06, | |
| "loss": 0.4451, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 2.6729732702672973, | |
| "grad_norm": 2.883255958557129, | |
| "learning_rate": 4.656146142841422e-06, | |
| "loss": 0.4327, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 2.6751732482675172, | |
| "grad_norm": 2.8640213012695312, | |
| "learning_rate": 4.6517442500275126e-06, | |
| "loss": 0.4507, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 2.677373226267737, | |
| "grad_norm": 2.547304630279541, | |
| "learning_rate": 4.647342357213602e-06, | |
| "loss": 0.4391, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 2.679573204267957, | |
| "grad_norm": 2.599860668182373, | |
| "learning_rate": 4.642940464399692e-06, | |
| "loss": 0.4351, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 2.681773182268177, | |
| "grad_norm": 2.898108959197998, | |
| "learning_rate": 4.6385385715857826e-06, | |
| "loss": 0.4486, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 2.683973160268397, | |
| "grad_norm": 3.2468693256378174, | |
| "learning_rate": 4.634136678771872e-06, | |
| "loss": 0.4465, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 2.686173138268617, | |
| "grad_norm": 2.5715901851654053, | |
| "learning_rate": 4.629734785957962e-06, | |
| "loss": 0.4464, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 2.6883731162688376, | |
| "grad_norm": 3.2512638568878174, | |
| "learning_rate": 4.6253328931440526e-06, | |
| "loss": 0.436, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 2.690573094269057, | |
| "grad_norm": 3.1215593814849854, | |
| "learning_rate": 4.620931000330142e-06, | |
| "loss": 0.4333, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 2.6927730722692775, | |
| "grad_norm": 3.264613151550293, | |
| "learning_rate": 4.616529107516232e-06, | |
| "loss": 0.4288, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 2.694973050269497, | |
| "grad_norm": 3.0146634578704834, | |
| "learning_rate": 4.6121272147023226e-06, | |
| "loss": 0.4436, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 2.6971730282697175, | |
| "grad_norm": 3.1037158966064453, | |
| "learning_rate": 4.607725321888412e-06, | |
| "loss": 0.4545, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 2.699373006269937, | |
| "grad_norm": 3.4465529918670654, | |
| "learning_rate": 4.603323429074502e-06, | |
| "loss": 0.421, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 2.7015729842701575, | |
| "grad_norm": 3.160284996032715, | |
| "learning_rate": 4.5989215362605926e-06, | |
| "loss": 0.4323, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 2.7037729622703774, | |
| "grad_norm": 2.4882216453552246, | |
| "learning_rate": 4.594519643446683e-06, | |
| "loss": 0.4477, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 2.7059729402705974, | |
| "grad_norm": 2.794928550720215, | |
| "learning_rate": 4.590117750632772e-06, | |
| "loss": 0.4365, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 2.7081729182708174, | |
| "grad_norm": 3.3920443058013916, | |
| "learning_rate": 4.5857158578188626e-06, | |
| "loss": 0.4419, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 2.7103728962710374, | |
| "grad_norm": 3.186394214630127, | |
| "learning_rate": 4.581313965004953e-06, | |
| "loss": 0.4226, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 2.7125728742712574, | |
| "grad_norm": 3.5185060501098633, | |
| "learning_rate": 4.576912072191042e-06, | |
| "loss": 0.4487, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 2.7147728522714774, | |
| "grad_norm": 2.8069283962249756, | |
| "learning_rate": 4.5725101793771326e-06, | |
| "loss": 0.443, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 2.7169728302716973, | |
| "grad_norm": 4.11994743347168, | |
| "learning_rate": 4.568108286563223e-06, | |
| "loss": 0.4387, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 2.7191728082719173, | |
| "grad_norm": 3.6650454998016357, | |
| "learning_rate": 4.563706393749312e-06, | |
| "loss": 0.4431, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 2.7213727862721373, | |
| "grad_norm": 2.7214787006378174, | |
| "learning_rate": 4.5593045009354026e-06, | |
| "loss": 0.4299, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 2.7235727642723573, | |
| "grad_norm": 3.1517221927642822, | |
| "learning_rate": 4.554902608121493e-06, | |
| "loss": 0.4417, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 2.7257727422725773, | |
| "grad_norm": 2.9995832443237305, | |
| "learning_rate": 4.550500715307582e-06, | |
| "loss": 0.4357, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 2.7279727202727972, | |
| "grad_norm": 3.1918044090270996, | |
| "learning_rate": 4.5460988224936726e-06, | |
| "loss": 0.442, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 2.7301726982730172, | |
| "grad_norm": 3.101876735687256, | |
| "learning_rate": 4.541696929679763e-06, | |
| "loss": 0.4333, | |
| "step": 124100 | |
| }, | |
| { | |
| "epoch": 2.732372676273237, | |
| "grad_norm": 3.4324588775634766, | |
| "learning_rate": 4.537295036865852e-06, | |
| "loss": 0.431, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 2.734572654273457, | |
| "grad_norm": 3.6786587238311768, | |
| "learning_rate": 4.5328931440519426e-06, | |
| "loss": 0.4499, | |
| "step": 124300 | |
| }, | |
| { | |
| "epoch": 2.736772632273677, | |
| "grad_norm": 2.661198139190674, | |
| "learning_rate": 4.528491251238033e-06, | |
| "loss": 0.4446, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 2.738972610273897, | |
| "grad_norm": 2.958374500274658, | |
| "learning_rate": 4.524089358424123e-06, | |
| "loss": 0.4364, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 2.741172588274117, | |
| "grad_norm": 3.012861967086792, | |
| "learning_rate": 4.519687465610213e-06, | |
| "loss": 0.432, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 2.743372566274337, | |
| "grad_norm": 2.911194324493408, | |
| "learning_rate": 4.515285572796303e-06, | |
| "loss": 0.4524, | |
| "step": 124700 | |
| }, | |
| { | |
| "epoch": 2.745572544274557, | |
| "grad_norm": 2.891263961791992, | |
| "learning_rate": 4.510883679982393e-06, | |
| "loss": 0.435, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 2.7477725222747775, | |
| "grad_norm": 3.8306422233581543, | |
| "learning_rate": 4.5064817871684826e-06, | |
| "loss": 0.4331, | |
| "step": 124900 | |
| }, | |
| { | |
| "epoch": 2.749972500274997, | |
| "grad_norm": 3.3201122283935547, | |
| "learning_rate": 4.502079894354573e-06, | |
| "loss": 0.4483, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 2.7521724782752175, | |
| "grad_norm": 3.1668500900268555, | |
| "learning_rate": 4.497678001540663e-06, | |
| "loss": 0.4525, | |
| "step": 125100 | |
| }, | |
| { | |
| "epoch": 2.754372456275437, | |
| "grad_norm": 2.9911599159240723, | |
| "learning_rate": 4.4932761087267525e-06, | |
| "loss": 0.426, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 2.7565724342756575, | |
| "grad_norm": 2.79228138923645, | |
| "learning_rate": 4.488874215912843e-06, | |
| "loss": 0.4494, | |
| "step": 125300 | |
| }, | |
| { | |
| "epoch": 2.758772412275877, | |
| "grad_norm": 3.6654934883117676, | |
| "learning_rate": 4.484472323098933e-06, | |
| "loss": 0.4246, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 2.7609723902760974, | |
| "grad_norm": 3.1065101623535156, | |
| "learning_rate": 4.4800704302850225e-06, | |
| "loss": 0.4419, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 2.7631723682763174, | |
| "grad_norm": 3.4590702056884766, | |
| "learning_rate": 4.475668537471113e-06, | |
| "loss": 0.4429, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 2.7653723462765374, | |
| "grad_norm": 2.956099033355713, | |
| "learning_rate": 4.471266644657203e-06, | |
| "loss": 0.444, | |
| "step": 125700 | |
| }, | |
| { | |
| "epoch": 2.7675723242767574, | |
| "grad_norm": 3.131133556365967, | |
| "learning_rate": 4.4668647518432925e-06, | |
| "loss": 0.436, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 2.7697723022769773, | |
| "grad_norm": 2.866058349609375, | |
| "learning_rate": 4.462462859029383e-06, | |
| "loss": 0.4308, | |
| "step": 125900 | |
| }, | |
| { | |
| "epoch": 2.7719722802771973, | |
| "grad_norm": 2.9503538608551025, | |
| "learning_rate": 4.458060966215473e-06, | |
| "loss": 0.4339, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 2.7741722582774173, | |
| "grad_norm": 2.9972116947174072, | |
| "learning_rate": 4.4536590734015625e-06, | |
| "loss": 0.4457, | |
| "step": 126100 | |
| }, | |
| { | |
| "epoch": 2.7763722362776373, | |
| "grad_norm": 2.7631094455718994, | |
| "learning_rate": 4.449257180587654e-06, | |
| "loss": 0.4387, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 2.7785722142778573, | |
| "grad_norm": 2.7902297973632812, | |
| "learning_rate": 4.444855287773743e-06, | |
| "loss": 0.44, | |
| "step": 126300 | |
| }, | |
| { | |
| "epoch": 2.7807721922780773, | |
| "grad_norm": 2.688309907913208, | |
| "learning_rate": 4.440453394959833e-06, | |
| "loss": 0.4432, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 2.7829721702782972, | |
| "grad_norm": 2.6654300689697266, | |
| "learning_rate": 4.436051502145924e-06, | |
| "loss": 0.4347, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 2.7851721482785172, | |
| "grad_norm": 3.6261539459228516, | |
| "learning_rate": 4.431649609332013e-06, | |
| "loss": 0.4267, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 2.787372126278737, | |
| "grad_norm": 3.265857696533203, | |
| "learning_rate": 4.427247716518103e-06, | |
| "loss": 0.4374, | |
| "step": 126700 | |
| }, | |
| { | |
| "epoch": 2.789572104278957, | |
| "grad_norm": 3.311096668243408, | |
| "learning_rate": 4.422845823704193e-06, | |
| "loss": 0.4432, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 2.791772082279177, | |
| "grad_norm": 3.3290562629699707, | |
| "learning_rate": 4.418443930890283e-06, | |
| "loss": 0.4326, | |
| "step": 126900 | |
| }, | |
| { | |
| "epoch": 2.793972060279397, | |
| "grad_norm": 3.593282461166382, | |
| "learning_rate": 4.414042038076373e-06, | |
| "loss": 0.4422, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 2.796172038279617, | |
| "grad_norm": 2.8509931564331055, | |
| "learning_rate": 4.409640145262463e-06, | |
| "loss": 0.4443, | |
| "step": 127100 | |
| }, | |
| { | |
| "epoch": 2.798372016279837, | |
| "grad_norm": 2.7307536602020264, | |
| "learning_rate": 4.405238252448553e-06, | |
| "loss": 0.4362, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 2.800571994280057, | |
| "grad_norm": 3.677067518234253, | |
| "learning_rate": 4.400836359634643e-06, | |
| "loss": 0.4391, | |
| "step": 127300 | |
| }, | |
| { | |
| "epoch": 2.802771972280277, | |
| "grad_norm": 3.1093156337738037, | |
| "learning_rate": 4.396434466820733e-06, | |
| "loss": 0.4299, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 2.804971950280497, | |
| "grad_norm": 3.1432149410247803, | |
| "learning_rate": 4.392032574006823e-06, | |
| "loss": 0.4454, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 2.807171928280717, | |
| "grad_norm": 3.175234079360962, | |
| "learning_rate": 4.387630681192913e-06, | |
| "loss": 0.4361, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 2.809371906280937, | |
| "grad_norm": 3.7147324085235596, | |
| "learning_rate": 4.383228788379003e-06, | |
| "loss": 0.434, | |
| "step": 127700 | |
| }, | |
| { | |
| "epoch": 2.8115718842811575, | |
| "grad_norm": 3.4218947887420654, | |
| "learning_rate": 4.378826895565093e-06, | |
| "loss": 0.4392, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 2.813771862281377, | |
| "grad_norm": 4.2883195877075195, | |
| "learning_rate": 4.374425002751183e-06, | |
| "loss": 0.4254, | |
| "step": 127900 | |
| }, | |
| { | |
| "epoch": 2.8159718402815974, | |
| "grad_norm": 2.9192450046539307, | |
| "learning_rate": 4.370023109937274e-06, | |
| "loss": 0.4596, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 2.818171818281817, | |
| "grad_norm": 3.3479373455047607, | |
| "learning_rate": 4.365621217123364e-06, | |
| "loss": 0.4299, | |
| "step": 128100 | |
| }, | |
| { | |
| "epoch": 2.8203717962820374, | |
| "grad_norm": 3.0389039516448975, | |
| "learning_rate": 4.361219324309453e-06, | |
| "loss": 0.4348, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 2.822571774282257, | |
| "grad_norm": 3.236820697784424, | |
| "learning_rate": 4.356817431495544e-06, | |
| "loss": 0.4244, | |
| "step": 128300 | |
| }, | |
| { | |
| "epoch": 2.8247717522824773, | |
| "grad_norm": 3.319667339324951, | |
| "learning_rate": 4.352415538681634e-06, | |
| "loss": 0.4316, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 2.8269717302826973, | |
| "grad_norm": 3.5438737869262695, | |
| "learning_rate": 4.348013645867723e-06, | |
| "loss": 0.4427, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 2.8291717082829173, | |
| "grad_norm": 2.995439052581787, | |
| "learning_rate": 4.343611753053814e-06, | |
| "loss": 0.4402, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 2.8313716862831373, | |
| "grad_norm": 2.894104480743408, | |
| "learning_rate": 4.339209860239904e-06, | |
| "loss": 0.4351, | |
| "step": 128700 | |
| }, | |
| { | |
| "epoch": 2.8335716642833573, | |
| "grad_norm": 3.5675222873687744, | |
| "learning_rate": 4.334807967425993e-06, | |
| "loss": 0.4359, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 2.8357716422835773, | |
| "grad_norm": 2.749405860900879, | |
| "learning_rate": 4.330406074612084e-06, | |
| "loss": 0.4353, | |
| "step": 128900 | |
| }, | |
| { | |
| "epoch": 2.8379716202837972, | |
| "grad_norm": 3.581648826599121, | |
| "learning_rate": 4.326004181798173e-06, | |
| "loss": 0.4465, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 2.840171598284017, | |
| "grad_norm": 2.5396058559417725, | |
| "learning_rate": 4.321602288984263e-06, | |
| "loss": 0.4505, | |
| "step": 129100 | |
| }, | |
| { | |
| "epoch": 2.842371576284237, | |
| "grad_norm": 3.2663464546203613, | |
| "learning_rate": 4.317200396170354e-06, | |
| "loss": 0.4315, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 2.844571554284457, | |
| "grad_norm": 3.687699556350708, | |
| "learning_rate": 4.312798503356443e-06, | |
| "loss": 0.4341, | |
| "step": 129300 | |
| }, | |
| { | |
| "epoch": 2.846771532284677, | |
| "grad_norm": 3.822061538696289, | |
| "learning_rate": 4.308396610542533e-06, | |
| "loss": 0.4204, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 2.848971510284897, | |
| "grad_norm": 4.063410758972168, | |
| "learning_rate": 4.303994717728624e-06, | |
| "loss": 0.4281, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 2.851171488285117, | |
| "grad_norm": 3.304727554321289, | |
| "learning_rate": 4.299592824914713e-06, | |
| "loss": 0.4276, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 2.853371466285337, | |
| "grad_norm": 3.195687770843506, | |
| "learning_rate": 4.295190932100804e-06, | |
| "loss": 0.4195, | |
| "step": 129700 | |
| }, | |
| { | |
| "epoch": 2.855571444285557, | |
| "grad_norm": 3.390817403793335, | |
| "learning_rate": 4.290789039286894e-06, | |
| "loss": 0.4442, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 2.857771422285777, | |
| "grad_norm": 3.095522403717041, | |
| "learning_rate": 4.286387146472984e-06, | |
| "loss": 0.4459, | |
| "step": 129900 | |
| }, | |
| { | |
| "epoch": 2.859971400285997, | |
| "grad_norm": 2.7765722274780273, | |
| "learning_rate": 4.281985253659074e-06, | |
| "loss": 0.4347, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 2.862171378286217, | |
| "grad_norm": 3.3501715660095215, | |
| "learning_rate": 4.277583360845164e-06, | |
| "loss": 0.4315, | |
| "step": 130100 | |
| }, | |
| { | |
| "epoch": 2.864371356286437, | |
| "grad_norm": 2.8992860317230225, | |
| "learning_rate": 4.273181468031254e-06, | |
| "loss": 0.4196, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 2.866571334286657, | |
| "grad_norm": 3.240837335586548, | |
| "learning_rate": 4.268779575217344e-06, | |
| "loss": 0.4253, | |
| "step": 130300 | |
| }, | |
| { | |
| "epoch": 2.868771312286877, | |
| "grad_norm": 2.687161445617676, | |
| "learning_rate": 4.264377682403434e-06, | |
| "loss": 0.4297, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 2.870971290287097, | |
| "grad_norm": 3.1937789916992188, | |
| "learning_rate": 4.259975789589524e-06, | |
| "loss": 0.4367, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 2.873171268287317, | |
| "grad_norm": 2.9205288887023926, | |
| "learning_rate": 4.255573896775614e-06, | |
| "loss": 0.434, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 2.8753712462875374, | |
| "grad_norm": 2.830369710922241, | |
| "learning_rate": 4.251172003961704e-06, | |
| "loss": 0.4487, | |
| "step": 130700 | |
| }, | |
| { | |
| "epoch": 2.877571224287757, | |
| "grad_norm": 3.458214044570923, | |
| "learning_rate": 4.246770111147794e-06, | |
| "loss": 0.4326, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 2.8797712022879773, | |
| "grad_norm": 3.2541399002075195, | |
| "learning_rate": 4.242368218333883e-06, | |
| "loss": 0.4541, | |
| "step": 130900 | |
| }, | |
| { | |
| "epoch": 2.881971180288197, | |
| "grad_norm": 3.3345773220062256, | |
| "learning_rate": 4.237966325519974e-06, | |
| "loss": 0.434, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 2.8841711582884173, | |
| "grad_norm": 3.172386646270752, | |
| "learning_rate": 4.233564432706064e-06, | |
| "loss": 0.4399, | |
| "step": 131100 | |
| }, | |
| { | |
| "epoch": 2.886371136288637, | |
| "grad_norm": 2.489182472229004, | |
| "learning_rate": 4.229162539892153e-06, | |
| "loss": 0.4324, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 2.8885711142888573, | |
| "grad_norm": 3.0109496116638184, | |
| "learning_rate": 4.224760647078244e-06, | |
| "loss": 0.4342, | |
| "step": 131300 | |
| }, | |
| { | |
| "epoch": 2.8907710922890772, | |
| "grad_norm": 3.0989527702331543, | |
| "learning_rate": 4.220358754264334e-06, | |
| "loss": 0.4317, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 2.8929710702892972, | |
| "grad_norm": 4.138854026794434, | |
| "learning_rate": 4.215956861450424e-06, | |
| "loss": 0.4243, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 2.895171048289517, | |
| "grad_norm": 2.925975799560547, | |
| "learning_rate": 4.2115549686365145e-06, | |
| "loss": 0.4307, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 2.897371026289737, | |
| "grad_norm": 2.7254014015197754, | |
| "learning_rate": 4.207153075822604e-06, | |
| "loss": 0.4311, | |
| "step": 131700 | |
| }, | |
| { | |
| "epoch": 2.899571004289957, | |
| "grad_norm": 3.5956203937530518, | |
| "learning_rate": 4.202751183008694e-06, | |
| "loss": 0.43, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 2.901770982290177, | |
| "grad_norm": 3.406620502471924, | |
| "learning_rate": 4.1983492901947845e-06, | |
| "loss": 0.4321, | |
| "step": 131900 | |
| }, | |
| { | |
| "epoch": 2.903970960290397, | |
| "grad_norm": 3.0268537998199463, | |
| "learning_rate": 4.193947397380874e-06, | |
| "loss": 0.4328, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 2.906170938290617, | |
| "grad_norm": 3.0812931060791016, | |
| "learning_rate": 4.189545504566964e-06, | |
| "loss": 0.4443, | |
| "step": 132100 | |
| }, | |
| { | |
| "epoch": 2.908370916290837, | |
| "grad_norm": 2.5374112129211426, | |
| "learning_rate": 4.1851436117530545e-06, | |
| "loss": 0.4324, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 2.910570894291057, | |
| "grad_norm": 2.906034231185913, | |
| "learning_rate": 4.180741718939144e-06, | |
| "loss": 0.4358, | |
| "step": 132300 | |
| }, | |
| { | |
| "epoch": 2.912770872291277, | |
| "grad_norm": 3.593029499053955, | |
| "learning_rate": 4.176339826125234e-06, | |
| "loss": 0.4517, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 2.914970850291497, | |
| "grad_norm": 2.914520025253296, | |
| "learning_rate": 4.1719379333113245e-06, | |
| "loss": 0.4485, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 2.917170828291717, | |
| "grad_norm": 2.874202013015747, | |
| "learning_rate": 4.167536040497414e-06, | |
| "loss": 0.4506, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 2.919370806291937, | |
| "grad_norm": 3.172924041748047, | |
| "learning_rate": 4.163134147683504e-06, | |
| "loss": 0.4416, | |
| "step": 132700 | |
| }, | |
| { | |
| "epoch": 2.921570784292157, | |
| "grad_norm": 4.034905433654785, | |
| "learning_rate": 4.1587322548695945e-06, | |
| "loss": 0.4297, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 2.923770762292377, | |
| "grad_norm": 2.940948963165283, | |
| "learning_rate": 4.154330362055684e-06, | |
| "loss": 0.4277, | |
| "step": 132900 | |
| }, | |
| { | |
| "epoch": 2.925970740292597, | |
| "grad_norm": 4.134010314941406, | |
| "learning_rate": 4.149928469241774e-06, | |
| "loss": 0.4206, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 2.928170718292817, | |
| "grad_norm": 3.641511917114258, | |
| "learning_rate": 4.145526576427864e-06, | |
| "loss": 0.4433, | |
| "step": 133100 | |
| }, | |
| { | |
| "epoch": 2.930370696293037, | |
| "grad_norm": 3.1284308433532715, | |
| "learning_rate": 4.141124683613955e-06, | |
| "loss": 0.42, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 2.932570674293257, | |
| "grad_norm": 3.499300956726074, | |
| "learning_rate": 4.136722790800044e-06, | |
| "loss": 0.4429, | |
| "step": 133300 | |
| }, | |
| { | |
| "epoch": 2.9347706522934773, | |
| "grad_norm": 3.7782890796661377, | |
| "learning_rate": 4.1323208979861345e-06, | |
| "loss": 0.4207, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 2.936970630293697, | |
| "grad_norm": 2.8001630306243896, | |
| "learning_rate": 4.127919005172225e-06, | |
| "loss": 0.423, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 2.9391706082939173, | |
| "grad_norm": 3.0386412143707275, | |
| "learning_rate": 4.123517112358314e-06, | |
| "loss": 0.4353, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 2.941370586294137, | |
| "grad_norm": 3.480564594268799, | |
| "learning_rate": 4.1191152195444045e-06, | |
| "loss": 0.4373, | |
| "step": 133700 | |
| }, | |
| { | |
| "epoch": 2.9435705642943573, | |
| "grad_norm": 3.148545503616333, | |
| "learning_rate": 4.114713326730495e-06, | |
| "loss": 0.4359, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 2.945770542294577, | |
| "grad_norm": 2.8668603897094727, | |
| "learning_rate": 4.110311433916584e-06, | |
| "loss": 0.4435, | |
| "step": 133900 | |
| }, | |
| { | |
| "epoch": 2.9479705202947972, | |
| "grad_norm": 3.410372495651245, | |
| "learning_rate": 4.1059095411026745e-06, | |
| "loss": 0.4515, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 2.950170498295017, | |
| "grad_norm": 3.0960798263549805, | |
| "learning_rate": 4.101507648288765e-06, | |
| "loss": 0.4359, | |
| "step": 134100 | |
| }, | |
| { | |
| "epoch": 2.952370476295237, | |
| "grad_norm": 2.3949267864227295, | |
| "learning_rate": 4.097105755474854e-06, | |
| "loss": 0.4283, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 2.954570454295457, | |
| "grad_norm": 3.325115442276001, | |
| "learning_rate": 4.0927038626609445e-06, | |
| "loss": 0.4281, | |
| "step": 134300 | |
| }, | |
| { | |
| "epoch": 2.956770432295677, | |
| "grad_norm": 3.046936511993408, | |
| "learning_rate": 4.088301969847035e-06, | |
| "loss": 0.4431, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 2.958970410295897, | |
| "grad_norm": 3.0470268726348877, | |
| "learning_rate": 4.083900077033124e-06, | |
| "loss": 0.4235, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 2.961170388296117, | |
| "grad_norm": 2.8730931282043457, | |
| "learning_rate": 4.0794981842192145e-06, | |
| "loss": 0.4248, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 2.963370366296337, | |
| "grad_norm": 2.930630922317505, | |
| "learning_rate": 4.075096291405305e-06, | |
| "loss": 0.4398, | |
| "step": 134700 | |
| }, | |
| { | |
| "epoch": 2.965570344296557, | |
| "grad_norm": 3.920790672302246, | |
| "learning_rate": 4.070694398591394e-06, | |
| "loss": 0.4423, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 2.967770322296777, | |
| "grad_norm": 3.2532870769500732, | |
| "learning_rate": 4.0662925057774845e-06, | |
| "loss": 0.424, | |
| "step": 134900 | |
| }, | |
| { | |
| "epoch": 2.969970300296997, | |
| "grad_norm": 3.4274468421936035, | |
| "learning_rate": 4.061890612963575e-06, | |
| "loss": 0.4408, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 2.972170278297217, | |
| "grad_norm": 3.6045656204223633, | |
| "learning_rate": 4.057488720149665e-06, | |
| "loss": 0.4439, | |
| "step": 135100 | |
| }, | |
| { | |
| "epoch": 2.974370256297437, | |
| "grad_norm": 3.063582181930542, | |
| "learning_rate": 4.0530868273357545e-06, | |
| "loss": 0.4305, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 2.976570234297657, | |
| "grad_norm": 3.4174232482910156, | |
| "learning_rate": 4.048684934521845e-06, | |
| "loss": 0.4334, | |
| "step": 135300 | |
| }, | |
| { | |
| "epoch": 2.978770212297877, | |
| "grad_norm": 2.2402751445770264, | |
| "learning_rate": 4.044283041707935e-06, | |
| "loss": 0.4376, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 2.980970190298097, | |
| "grad_norm": 3.14042067527771, | |
| "learning_rate": 4.0398811488940245e-06, | |
| "loss": 0.443, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 2.983170168298317, | |
| "grad_norm": 4.143354892730713, | |
| "learning_rate": 4.035479256080115e-06, | |
| "loss": 0.4336, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 2.985370146298537, | |
| "grad_norm": 3.4250524044036865, | |
| "learning_rate": 4.031077363266205e-06, | |
| "loss": 0.4286, | |
| "step": 135700 | |
| }, | |
| { | |
| "epoch": 2.987570124298757, | |
| "grad_norm": 3.041456937789917, | |
| "learning_rate": 4.0266754704522945e-06, | |
| "loss": 0.4296, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 2.989770102298977, | |
| "grad_norm": 2.1302220821380615, | |
| "learning_rate": 4.022273577638385e-06, | |
| "loss": 0.4336, | |
| "step": 135900 | |
| }, | |
| { | |
| "epoch": 2.991970080299197, | |
| "grad_norm": 3.928239107131958, | |
| "learning_rate": 4.017871684824475e-06, | |
| "loss": 0.4352, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 2.994170058299417, | |
| "grad_norm": 2.7161359786987305, | |
| "learning_rate": 4.0134697920105645e-06, | |
| "loss": 0.4409, | |
| "step": 136100 | |
| }, | |
| { | |
| "epoch": 2.996370036299637, | |
| "grad_norm": 2.8443000316619873, | |
| "learning_rate": 4.009067899196655e-06, | |
| "loss": 0.4227, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 2.9985700142998573, | |
| "grad_norm": 2.336637020111084, | |
| "learning_rate": 4.004666006382745e-06, | |
| "loss": 0.4296, | |
| "step": 136300 | |
| }, | |
| { | |
| "epoch": 3.000769992300077, | |
| "grad_norm": 2.747061252593994, | |
| "learning_rate": 4.0002641135688345e-06, | |
| "loss": 0.4243, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 3.0029699703002968, | |
| "grad_norm": 2.6186234951019287, | |
| "learning_rate": 3.995862220754925e-06, | |
| "loss": 0.3946, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 3.005169948300517, | |
| "grad_norm": 3.578420400619507, | |
| "learning_rate": 3.991460327941015e-06, | |
| "loss": 0.3841, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 3.007369926300737, | |
| "grad_norm": 3.9675164222717285, | |
| "learning_rate": 3.987058435127105e-06, | |
| "loss": 0.4017, | |
| "step": 136700 | |
| }, | |
| { | |
| "epoch": 3.009569904300957, | |
| "grad_norm": 3.1522490978240967, | |
| "learning_rate": 3.982656542313195e-06, | |
| "loss": 0.3878, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 3.011769882301177, | |
| "grad_norm": 3.3388068675994873, | |
| "learning_rate": 3.978254649499285e-06, | |
| "loss": 0.4005, | |
| "step": 136900 | |
| }, | |
| { | |
| "epoch": 3.013969860301397, | |
| "grad_norm": 3.6714820861816406, | |
| "learning_rate": 3.973852756685375e-06, | |
| "loss": 0.406, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 3.016169838301617, | |
| "grad_norm": 2.9617388248443604, | |
| "learning_rate": 3.969450863871465e-06, | |
| "loss": 0.3915, | |
| "step": 137100 | |
| }, | |
| { | |
| "epoch": 3.018369816301837, | |
| "grad_norm": 3.648895263671875, | |
| "learning_rate": 3.965048971057555e-06, | |
| "loss": 0.3919, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 3.020569794302057, | |
| "grad_norm": 3.127763509750366, | |
| "learning_rate": 3.960647078243645e-06, | |
| "loss": 0.3969, | |
| "step": 137300 | |
| }, | |
| { | |
| "epoch": 3.022769772302277, | |
| "grad_norm": 4.054533004760742, | |
| "learning_rate": 3.956245185429735e-06, | |
| "loss": 0.4023, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 3.024969750302497, | |
| "grad_norm": 3.8178627490997314, | |
| "learning_rate": 3.951843292615825e-06, | |
| "loss": 0.3848, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 3.027169728302717, | |
| "grad_norm": 3.450464963912964, | |
| "learning_rate": 3.947441399801915e-06, | |
| "loss": 0.3983, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 3.029369706302937, | |
| "grad_norm": 3.210991144180298, | |
| "learning_rate": 3.943039506988005e-06, | |
| "loss": 0.3993, | |
| "step": 137700 | |
| }, | |
| { | |
| "epoch": 3.031569684303157, | |
| "grad_norm": 3.706838369369507, | |
| "learning_rate": 3.938637614174095e-06, | |
| "loss": 0.3916, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 3.033769662303377, | |
| "grad_norm": 3.306352138519287, | |
| "learning_rate": 3.934235721360185e-06, | |
| "loss": 0.404, | |
| "step": 137900 | |
| }, | |
| { | |
| "epoch": 3.035969640303597, | |
| "grad_norm": 4.016099452972412, | |
| "learning_rate": 3.929833828546275e-06, | |
| "loss": 0.4058, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 3.038169618303817, | |
| "grad_norm": 3.057190418243408, | |
| "learning_rate": 3.925431935732365e-06, | |
| "loss": 0.4105, | |
| "step": 138100 | |
| }, | |
| { | |
| "epoch": 3.040369596304037, | |
| "grad_norm": 3.2745933532714844, | |
| "learning_rate": 3.921030042918455e-06, | |
| "loss": 0.3888, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 3.042569574304257, | |
| "grad_norm": 3.0036211013793945, | |
| "learning_rate": 3.916628150104545e-06, | |
| "loss": 0.4039, | |
| "step": 138300 | |
| }, | |
| { | |
| "epoch": 3.044769552304477, | |
| "grad_norm": 3.3982667922973633, | |
| "learning_rate": 3.912226257290636e-06, | |
| "loss": 0.3955, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 3.046969530304697, | |
| "grad_norm": 2.8318960666656494, | |
| "learning_rate": 3.907824364476725e-06, | |
| "loss": 0.406, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 3.049169508304917, | |
| "grad_norm": 2.669373035430908, | |
| "learning_rate": 3.903422471662816e-06, | |
| "loss": 0.4009, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 3.051369486305137, | |
| "grad_norm": 3.476454257965088, | |
| "learning_rate": 3.899020578848906e-06, | |
| "loss": 0.3958, | |
| "step": 138700 | |
| }, | |
| { | |
| "epoch": 3.053569464305357, | |
| "grad_norm": 3.4908926486968994, | |
| "learning_rate": 3.894618686034995e-06, | |
| "loss": 0.4008, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 3.055769442305577, | |
| "grad_norm": 3.15459942817688, | |
| "learning_rate": 3.890216793221086e-06, | |
| "loss": 0.3928, | |
| "step": 138900 | |
| }, | |
| { | |
| "epoch": 3.0579694203057968, | |
| "grad_norm": 3.2345471382141113, | |
| "learning_rate": 3.885814900407175e-06, | |
| "loss": 0.4051, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 3.0601693983060168, | |
| "grad_norm": 3.2796826362609863, | |
| "learning_rate": 3.881413007593265e-06, | |
| "loss": 0.3985, | |
| "step": 139100 | |
| }, | |
| { | |
| "epoch": 3.0623693763062367, | |
| "grad_norm": 3.1456501483917236, | |
| "learning_rate": 3.877011114779356e-06, | |
| "loss": 0.4006, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 3.0645693543064567, | |
| "grad_norm": 3.5905213356018066, | |
| "learning_rate": 3.872609221965445e-06, | |
| "loss": 0.4005, | |
| "step": 139300 | |
| }, | |
| { | |
| "epoch": 3.066769332306677, | |
| "grad_norm": 3.593623399734497, | |
| "learning_rate": 3.868207329151535e-06, | |
| "loss": 0.3984, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 3.068969310306897, | |
| "grad_norm": 3.059357166290283, | |
| "learning_rate": 3.863805436337626e-06, | |
| "loss": 0.4105, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 3.071169288307117, | |
| "grad_norm": 3.4862234592437744, | |
| "learning_rate": 3.859403543523715e-06, | |
| "loss": 0.3943, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 3.073369266307337, | |
| "grad_norm": 3.381134033203125, | |
| "learning_rate": 3.855001650709805e-06, | |
| "loss": 0.3865, | |
| "step": 139700 | |
| }, | |
| { | |
| "epoch": 3.075569244307557, | |
| "grad_norm": 3.13862681388855, | |
| "learning_rate": 3.850599757895896e-06, | |
| "loss": 0.3895, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 3.077769222307777, | |
| "grad_norm": 3.6578209400177, | |
| "learning_rate": 3.846197865081985e-06, | |
| "loss": 0.3972, | |
| "step": 139900 | |
| }, | |
| { | |
| "epoch": 3.079969200307997, | |
| "grad_norm": 3.353710174560547, | |
| "learning_rate": 3.841795972268075e-06, | |
| "loss": 0.3935, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 3.082169178308217, | |
| "grad_norm": 3.3863718509674072, | |
| "learning_rate": 3.837394079454166e-06, | |
| "loss": 0.401, | |
| "step": 140100 | |
| }, | |
| { | |
| "epoch": 3.084369156308437, | |
| "grad_norm": 3.574791669845581, | |
| "learning_rate": 3.832992186640256e-06, | |
| "loss": 0.3932, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 3.086569134308657, | |
| "grad_norm": 3.1452407836914062, | |
| "learning_rate": 3.828590293826346e-06, | |
| "loss": 0.3982, | |
| "step": 140300 | |
| }, | |
| { | |
| "epoch": 3.088769112308877, | |
| "grad_norm": 3.7013602256774902, | |
| "learning_rate": 3.824188401012436e-06, | |
| "loss": 0.3871, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 3.090969090309097, | |
| "grad_norm": 3.909804105758667, | |
| "learning_rate": 3.819786508198526e-06, | |
| "loss": 0.3843, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 3.093169068309317, | |
| "grad_norm": 3.576997995376587, | |
| "learning_rate": 3.815384615384616e-06, | |
| "loss": 0.3895, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 3.095369046309537, | |
| "grad_norm": 3.9983808994293213, | |
| "learning_rate": 3.8109827225707056e-06, | |
| "loss": 0.3864, | |
| "step": 140700 | |
| }, | |
| { | |
| "epoch": 3.097569024309757, | |
| "grad_norm": 3.6163980960845947, | |
| "learning_rate": 3.806580829756796e-06, | |
| "loss": 0.3903, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 3.099769002309977, | |
| "grad_norm": 3.2333366870880127, | |
| "learning_rate": 3.8021789369428858e-06, | |
| "loss": 0.3939, | |
| "step": 140900 | |
| }, | |
| { | |
| "epoch": 3.101968980310197, | |
| "grad_norm": 3.2053537368774414, | |
| "learning_rate": 3.7977770441289756e-06, | |
| "loss": 0.3983, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 3.104168958310417, | |
| "grad_norm": 3.423635721206665, | |
| "learning_rate": 3.793375151315066e-06, | |
| "loss": 0.3937, | |
| "step": 141100 | |
| }, | |
| { | |
| "epoch": 3.106368936310637, | |
| "grad_norm": 3.9658424854278564, | |
| "learning_rate": 3.7889732585011558e-06, | |
| "loss": 0.3952, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 3.108568914310857, | |
| "grad_norm": 3.184368848800659, | |
| "learning_rate": 3.7845713656872456e-06, | |
| "loss": 0.3854, | |
| "step": 141300 | |
| }, | |
| { | |
| "epoch": 3.110768892311077, | |
| "grad_norm": 3.8191583156585693, | |
| "learning_rate": 3.7801694728733355e-06, | |
| "loss": 0.3965, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 3.1129688703112968, | |
| "grad_norm": 3.2895469665527344, | |
| "learning_rate": 3.7757675800594258e-06, | |
| "loss": 0.4101, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 3.1151688483115167, | |
| "grad_norm": 3.220507860183716, | |
| "learning_rate": 3.7713656872455156e-06, | |
| "loss": 0.3977, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 3.1173688263117367, | |
| "grad_norm": 3.0123960971832275, | |
| "learning_rate": 3.7669637944316055e-06, | |
| "loss": 0.4042, | |
| "step": 141700 | |
| }, | |
| { | |
| "epoch": 3.1195688043119567, | |
| "grad_norm": 3.4893038272857666, | |
| "learning_rate": 3.762561901617696e-06, | |
| "loss": 0.4052, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 3.1217687823121767, | |
| "grad_norm": 3.987698793411255, | |
| "learning_rate": 3.758160008803786e-06, | |
| "loss": 0.3979, | |
| "step": 141900 | |
| }, | |
| { | |
| "epoch": 3.1239687603123967, | |
| "grad_norm": 3.5184412002563477, | |
| "learning_rate": 3.7537581159898763e-06, | |
| "loss": 0.4114, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 3.126168738312617, | |
| "grad_norm": 4.024544715881348, | |
| "learning_rate": 3.749356223175966e-06, | |
| "loss": 0.3996, | |
| "step": 142100 | |
| }, | |
| { | |
| "epoch": 3.1283687163128366, | |
| "grad_norm": 3.358962059020996, | |
| "learning_rate": 3.744954330362056e-06, | |
| "loss": 0.3981, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 3.130568694313057, | |
| "grad_norm": 2.8024742603302, | |
| "learning_rate": 3.7405524375481463e-06, | |
| "loss": 0.402, | |
| "step": 142300 | |
| }, | |
| { | |
| "epoch": 3.132768672313277, | |
| "grad_norm": 3.1832938194274902, | |
| "learning_rate": 3.736150544734236e-06, | |
| "loss": 0.3951, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 3.134968650313497, | |
| "grad_norm": 3.7349486351013184, | |
| "learning_rate": 3.731748651920326e-06, | |
| "loss": 0.3911, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 3.137168628313717, | |
| "grad_norm": 3.722146987915039, | |
| "learning_rate": 3.727346759106416e-06, | |
| "loss": 0.3991, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 3.139368606313937, | |
| "grad_norm": 3.13198184967041, | |
| "learning_rate": 3.722944866292506e-06, | |
| "loss": 0.3937, | |
| "step": 142700 | |
| }, | |
| { | |
| "epoch": 3.141568584314157, | |
| "grad_norm": 3.511359453201294, | |
| "learning_rate": 3.718542973478596e-06, | |
| "loss": 0.3989, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 3.143768562314377, | |
| "grad_norm": 4.363528251647949, | |
| "learning_rate": 3.714141080664686e-06, | |
| "loss": 0.4041, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 3.145968540314597, | |
| "grad_norm": 3.696638584136963, | |
| "learning_rate": 3.709739187850776e-06, | |
| "loss": 0.3971, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 3.148168518314817, | |
| "grad_norm": 3.170654058456421, | |
| "learning_rate": 3.705337295036866e-06, | |
| "loss": 0.4002, | |
| "step": 143100 | |
| }, | |
| { | |
| "epoch": 3.150368496315037, | |
| "grad_norm": 4.185492992401123, | |
| "learning_rate": 3.700935402222956e-06, | |
| "loss": 0.3912, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 3.152568474315257, | |
| "grad_norm": 3.829686403274536, | |
| "learning_rate": 3.696533509409046e-06, | |
| "loss": 0.3914, | |
| "step": 143300 | |
| }, | |
| { | |
| "epoch": 3.154768452315477, | |
| "grad_norm": 2.9345476627349854, | |
| "learning_rate": 3.692131616595136e-06, | |
| "loss": 0.3962, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 3.156968430315697, | |
| "grad_norm": 3.666574239730835, | |
| "learning_rate": 3.687729723781226e-06, | |
| "loss": 0.401, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 3.159168408315917, | |
| "grad_norm": 3.359739303588867, | |
| "learning_rate": 3.6833278309673166e-06, | |
| "loss": 0.3992, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 3.161368386316137, | |
| "grad_norm": 4.227367877960205, | |
| "learning_rate": 3.6789259381534065e-06, | |
| "loss": 0.401, | |
| "step": 143700 | |
| }, | |
| { | |
| "epoch": 3.163568364316357, | |
| "grad_norm": 2.7858405113220215, | |
| "learning_rate": 3.6745240453394963e-06, | |
| "loss": 0.3907, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 3.1657683423165768, | |
| "grad_norm": 3.08479905128479, | |
| "learning_rate": 3.6701221525255866e-06, | |
| "loss": 0.395, | |
| "step": 143900 | |
| }, | |
| { | |
| "epoch": 3.1679683203167968, | |
| "grad_norm": 3.606621265411377, | |
| "learning_rate": 3.6657202597116765e-06, | |
| "loss": 0.3916, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 3.1701682983170167, | |
| "grad_norm": 4.141706466674805, | |
| "learning_rate": 3.6613183668977663e-06, | |
| "loss": 0.3987, | |
| "step": 144100 | |
| }, | |
| { | |
| "epoch": 3.1723682763172367, | |
| "grad_norm": 3.2608320713043213, | |
| "learning_rate": 3.6569164740838566e-06, | |
| "loss": 0.4112, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 3.1745682543174567, | |
| "grad_norm": 3.0765554904937744, | |
| "learning_rate": 3.6525145812699465e-06, | |
| "loss": 0.4057, | |
| "step": 144300 | |
| }, | |
| { | |
| "epoch": 3.1767682323176767, | |
| "grad_norm": 3.198472738265991, | |
| "learning_rate": 3.6481126884560363e-06, | |
| "loss": 0.3928, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 3.1789682103178967, | |
| "grad_norm": 3.3553693294525146, | |
| "learning_rate": 3.643710795642126e-06, | |
| "loss": 0.3934, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 3.1811681883181167, | |
| "grad_norm": 3.7630527019500732, | |
| "learning_rate": 3.6393089028282165e-06, | |
| "loss": 0.3957, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 3.1833681663183366, | |
| "grad_norm": 3.3282408714294434, | |
| "learning_rate": 3.6349070100143063e-06, | |
| "loss": 0.4038, | |
| "step": 144700 | |
| }, | |
| { | |
| "epoch": 3.1855681443185566, | |
| "grad_norm": 3.896204710006714, | |
| "learning_rate": 3.630505117200396e-06, | |
| "loss": 0.4176, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 3.1877681223187766, | |
| "grad_norm": 3.9070045948028564, | |
| "learning_rate": 3.6261032243864865e-06, | |
| "loss": 0.394, | |
| "step": 144900 | |
| }, | |
| { | |
| "epoch": 3.189968100318997, | |
| "grad_norm": 4.341803073883057, | |
| "learning_rate": 3.6217013315725763e-06, | |
| "loss": 0.4016, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 3.192168078319217, | |
| "grad_norm": 3.0518646240234375, | |
| "learning_rate": 3.617299438758666e-06, | |
| "loss": 0.4021, | |
| "step": 145100 | |
| }, | |
| { | |
| "epoch": 3.194368056319437, | |
| "grad_norm": 2.9907262325286865, | |
| "learning_rate": 3.6128975459447565e-06, | |
| "loss": 0.401, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 3.196568034319657, | |
| "grad_norm": 3.994093894958496, | |
| "learning_rate": 3.6084956531308467e-06, | |
| "loss": 0.3869, | |
| "step": 145300 | |
| }, | |
| { | |
| "epoch": 3.198768012319877, | |
| "grad_norm": 4.31938362121582, | |
| "learning_rate": 3.6040937603169366e-06, | |
| "loss": 0.3942, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 3.200967990320097, | |
| "grad_norm": 3.5487558841705322, | |
| "learning_rate": 3.599691867503027e-06, | |
| "loss": 0.3958, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 3.203167968320317, | |
| "grad_norm": 4.53445291519165, | |
| "learning_rate": 3.5952899746891167e-06, | |
| "loss": 0.3962, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 3.205367946320537, | |
| "grad_norm": 3.816943645477295, | |
| "learning_rate": 3.5908880818752066e-06, | |
| "loss": 0.4112, | |
| "step": 145700 | |
| }, | |
| { | |
| "epoch": 3.207567924320757, | |
| "grad_norm": 4.102901935577393, | |
| "learning_rate": 3.586486189061297e-06, | |
| "loss": 0.3914, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 3.209767902320977, | |
| "grad_norm": 3.5486576557159424, | |
| "learning_rate": 3.5820842962473867e-06, | |
| "loss": 0.3993, | |
| "step": 145900 | |
| }, | |
| { | |
| "epoch": 3.211967880321197, | |
| "grad_norm": 3.8645424842834473, | |
| "learning_rate": 3.5776824034334766e-06, | |
| "loss": 0.4078, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 3.214167858321417, | |
| "grad_norm": 3.282376527786255, | |
| "learning_rate": 3.573280510619567e-06, | |
| "loss": 0.4007, | |
| "step": 146100 | |
| }, | |
| { | |
| "epoch": 3.216367836321637, | |
| "grad_norm": 3.293292999267578, | |
| "learning_rate": 3.5688786178056567e-06, | |
| "loss": 0.3909, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 3.218567814321857, | |
| "grad_norm": 3.7592716217041016, | |
| "learning_rate": 3.5644767249917466e-06, | |
| "loss": 0.3916, | |
| "step": 146300 | |
| }, | |
| { | |
| "epoch": 3.2207677923220768, | |
| "grad_norm": 3.8671295642852783, | |
| "learning_rate": 3.5600748321778365e-06, | |
| "loss": 0.4028, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 3.2229677703222968, | |
| "grad_norm": 3.9341673851013184, | |
| "learning_rate": 3.5556729393639267e-06, | |
| "loss": 0.3957, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 3.2251677483225167, | |
| "grad_norm": 3.232847213745117, | |
| "learning_rate": 3.5512710465500166e-06, | |
| "loss": 0.39, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 3.2273677263227367, | |
| "grad_norm": 3.2240495681762695, | |
| "learning_rate": 3.5468691537361065e-06, | |
| "loss": 0.3947, | |
| "step": 146700 | |
| }, | |
| { | |
| "epoch": 3.2295677043229567, | |
| "grad_norm": 3.527489423751831, | |
| "learning_rate": 3.5424672609221967e-06, | |
| "loss": 0.3968, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 3.2317676823231767, | |
| "grad_norm": 3.633652687072754, | |
| "learning_rate": 3.5380653681082866e-06, | |
| "loss": 0.3883, | |
| "step": 146900 | |
| }, | |
| { | |
| "epoch": 3.2339676603233967, | |
| "grad_norm": 3.5555477142333984, | |
| "learning_rate": 3.5336634752943773e-06, | |
| "loss": 0.3965, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 3.2361676383236166, | |
| "grad_norm": 3.5308194160461426, | |
| "learning_rate": 3.529261582480467e-06, | |
| "loss": 0.3881, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 3.2383676163238366, | |
| "grad_norm": 4.819995880126953, | |
| "learning_rate": 3.524859689666557e-06, | |
| "loss": 0.4036, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 3.2405675943240566, | |
| "grad_norm": 4.3929033279418945, | |
| "learning_rate": 3.5204577968526473e-06, | |
| "loss": 0.3931, | |
| "step": 147300 | |
| }, | |
| { | |
| "epoch": 3.2427675723242766, | |
| "grad_norm": 2.9747936725616455, | |
| "learning_rate": 3.516055904038737e-06, | |
| "loss": 0.3986, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 3.2449675503244966, | |
| "grad_norm": 3.78959321975708, | |
| "learning_rate": 3.511654011224827e-06, | |
| "loss": 0.4019, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 3.2471675283247166, | |
| "grad_norm": 3.943894624710083, | |
| "learning_rate": 3.507252118410917e-06, | |
| "loss": 0.3924, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 3.249367506324937, | |
| "grad_norm": 3.963569402694702, | |
| "learning_rate": 3.502850225597007e-06, | |
| "loss": 0.4118, | |
| "step": 147700 | |
| }, | |
| { | |
| "epoch": 3.2515674843251565, | |
| "grad_norm": 3.0138792991638184, | |
| "learning_rate": 3.498448332783097e-06, | |
| "loss": 0.3914, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 3.253767462325377, | |
| "grad_norm": 3.723484754562378, | |
| "learning_rate": 3.494046439969187e-06, | |
| "loss": 0.3847, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 3.255967440325597, | |
| "grad_norm": 3.7332823276519775, | |
| "learning_rate": 3.489644547155277e-06, | |
| "loss": 0.3943, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 3.258167418325817, | |
| "grad_norm": 4.216028690338135, | |
| "learning_rate": 3.485242654341367e-06, | |
| "loss": 0.3959, | |
| "step": 148100 | |
| }, | |
| { | |
| "epoch": 3.260367396326037, | |
| "grad_norm": 2.8157236576080322, | |
| "learning_rate": 3.480840761527457e-06, | |
| "loss": 0.4163, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 3.262567374326257, | |
| "grad_norm": 3.428497076034546, | |
| "learning_rate": 3.476438868713547e-06, | |
| "loss": 0.4064, | |
| "step": 148300 | |
| }, | |
| { | |
| "epoch": 3.264767352326477, | |
| "grad_norm": 3.9073712825775146, | |
| "learning_rate": 3.472036975899637e-06, | |
| "loss": 0.4014, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 3.266967330326697, | |
| "grad_norm": 4.03035306930542, | |
| "learning_rate": 3.467635083085727e-06, | |
| "loss": 0.3991, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 3.269167308326917, | |
| "grad_norm": 2.9807870388031006, | |
| "learning_rate": 3.4632331902718167e-06, | |
| "loss": 0.4106, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 3.271367286327137, | |
| "grad_norm": 2.9369282722473145, | |
| "learning_rate": 3.458831297457907e-06, | |
| "loss": 0.4048, | |
| "step": 148700 | |
| }, | |
| { | |
| "epoch": 3.273567264327357, | |
| "grad_norm": 3.9154739379882812, | |
| "learning_rate": 3.4544294046439973e-06, | |
| "loss": 0.3984, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 3.2757672423275768, | |
| "grad_norm": 4.0573601722717285, | |
| "learning_rate": 3.4500275118300876e-06, | |
| "loss": 0.3994, | |
| "step": 148900 | |
| }, | |
| { | |
| "epoch": 3.2779672203277967, | |
| "grad_norm": 3.521632671356201, | |
| "learning_rate": 3.4456256190161774e-06, | |
| "loss": 0.4101, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 3.2801671983280167, | |
| "grad_norm": 3.2614357471466064, | |
| "learning_rate": 3.4412237262022673e-06, | |
| "loss": 0.404, | |
| "step": 149100 | |
| }, | |
| { | |
| "epoch": 3.2823671763282367, | |
| "grad_norm": 2.79972767829895, | |
| "learning_rate": 3.4368218333883576e-06, | |
| "loss": 0.3997, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 3.2845671543284567, | |
| "grad_norm": 3.753329038619995, | |
| "learning_rate": 3.4324199405744474e-06, | |
| "loss": 0.3992, | |
| "step": 149300 | |
| }, | |
| { | |
| "epoch": 3.2867671323286767, | |
| "grad_norm": 2.569004774093628, | |
| "learning_rate": 3.4280180477605373e-06, | |
| "loss": 0.4007, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 3.2889671103288967, | |
| "grad_norm": 3.0934865474700928, | |
| "learning_rate": 3.423616154946627e-06, | |
| "loss": 0.3978, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 3.2911670883291166, | |
| "grad_norm": 3.7940945625305176, | |
| "learning_rate": 3.4192142621327174e-06, | |
| "loss": 0.4009, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 3.2933670663293366, | |
| "grad_norm": 3.0418498516082764, | |
| "learning_rate": 3.4148123693188073e-06, | |
| "loss": 0.3977, | |
| "step": 149700 | |
| }, | |
| { | |
| "epoch": 3.2955670443295566, | |
| "grad_norm": 3.6287832260131836, | |
| "learning_rate": 3.410410476504897e-06, | |
| "loss": 0.399, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 3.2977670223297766, | |
| "grad_norm": 3.9616570472717285, | |
| "learning_rate": 3.4060085836909874e-06, | |
| "loss": 0.392, | |
| "step": 149900 | |
| }, | |
| { | |
| "epoch": 3.2999670003299966, | |
| "grad_norm": 3.6250250339508057, | |
| "learning_rate": 3.4016066908770773e-06, | |
| "loss": 0.3837, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 3.2999670003299966, | |
| "eval_loss": 0.5354483723640442, | |
| "eval_runtime": 386.9888, | |
| "eval_samples_per_second": 155.043, | |
| "eval_steps_per_second": 4.845, | |
| "step": 150000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 227275, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 50000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.913014241035995e+17, | |
| "train_batch_size": 22, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |