English
glove
lora
distillation
hard-negatives
qkv-split
jsanzolac's picture
Upload rank_300/train_log.jsonl with huggingface_hub
68f7fc3 verified
{"step": 100, "loss": 6.271974563598633, "loss_nce": 6.271329879760742, "loss_mse": 0.006446883548051119, "lr": 5e-05, "grad_norm": 0.05120902508497238, "wall_ms": 1650}
{"step": 200, "loss": 6.100264072418213, "loss_nce": 6.099607467651367, "loss_mse": 0.006567754317075014, "lr": 0.0001, "grad_norm": 0.10592078417539597, "wall_ms": 2890}
{"step": 300, "loss": 5.910880088806152, "loss_nce": 5.910226821899414, "loss_mse": 0.006534266751259565, "lr": 0.00015, "grad_norm": 0.096257783472538, "wall_ms": 4128}
{"step": 400, "loss": 5.564651966094971, "loss_nce": 5.564016342163086, "loss_mse": 0.00635649636387825, "lr": 0.0002, "grad_norm": 0.10613586753606796, "wall_ms": 5369}
{"step": 500, "loss": 5.1782450675964355, "loss_nce": 5.1776323318481445, "loss_mse": 0.006129041314125061, "lr": 0.00025, "grad_norm": 0.12485259771347046, "wall_ms": 6611}
{"step": 600, "loss": 4.916259288787842, "loss_nce": 4.91567325592041, "loss_mse": 0.005862667225301266, "lr": 0.0003, "grad_norm": 0.15372514724731445, "wall_ms": 7852}
{"step": 700, "loss": 4.595059394836426, "loss_nce": 4.594505310058594, "loss_mse": 0.0055415015667676926, "lr": 0.00035, "grad_norm": 0.1912592202425003, "wall_ms": 9093}
{"step": 800, "loss": 4.303678035736084, "loss_nce": 4.30314826965332, "loss_mse": 0.005296396557241678, "lr": 0.0004, "grad_norm": 0.18782269954681396, "wall_ms": 10333}
{"step": 900, "loss": 4.109781265258789, "loss_nce": 4.109272003173828, "loss_mse": 0.0050936066545546055, "lr": 0.00045000000000000004, "grad_norm": 0.19032245874404907, "wall_ms": 11573}
{"step": 1000, "loss": 3.957948684692383, "loss_nce": 3.9574594497680664, "loss_mse": 0.0048912763595581055, "lr": 0.0005, "grad_norm": 0.18928201496601105, "wall_ms": 12816}
{"step": 1100, "loss": 3.8569185733795166, "loss_nce": 3.8564326763153076, "loss_mse": 0.004859223496168852, "lr": 0.0004999994554181833, "grad_norm": 0.2015782594680786, "wall_ms": 14057}
{"step": 1200, "loss": 3.69417667388916, "loss_nce": 3.6936960220336914, "loss_mse": 0.004807649180293083, "lr": 0.0004999978216751542, "grad_norm": 0.21180157363414764, "wall_ms": 15296}
{"step": 1300, "loss": 3.5065858364105225, "loss_nce": 3.50612211227417, "loss_mse": 0.004638359881937504, "lr": 0.0004999950987781756, "grad_norm": 0.20782871544361115, "wall_ms": 16537}
{"step": 1400, "loss": 3.37308406829834, "loss_nce": 3.3726248741149902, "loss_mse": 0.004591515753418207, "lr": 0.0004999912867393524, "grad_norm": 0.21717792749404907, "wall_ms": 17777}
{"step": 1500, "loss": 3.3119609355926514, "loss_nce": 3.3115086555480957, "loss_mse": 0.004522266332060099, "lr": 0.0004999863855756311, "grad_norm": 0.2167711704969406, "wall_ms": 19013}
{"step": 1600, "loss": 3.193453788757324, "loss_nce": 3.1930012702941895, "loss_mse": 0.004524202551692724, "lr": 0.0004999803953088003, "grad_norm": 0.21516193449497223, "wall_ms": 20253}
{"step": 1700, "loss": 3.1398820877075195, "loss_nce": 3.139435291290283, "loss_mse": 0.00446693692356348, "lr": 0.00049997331596549, "grad_norm": 0.20427413284778595, "wall_ms": 21494}
{"step": 1800, "loss": 3.0724375247955322, "loss_nce": 3.0719945430755615, "loss_mse": 0.0044294120743870735, "lr": 0.000499965147577172, "grad_norm": 0.2230377346277237, "wall_ms": 22752}
{"step": 1900, "loss": 2.8865482807159424, "loss_nce": 2.886124610900879, "loss_mse": 0.004237017128616571, "lr": 0.0004999558901801592, "grad_norm": 0.2124606966972351, "wall_ms": 24009}
{"step": 2000, "loss": 2.9701640605926514, "loss_nce": 2.9697325229644775, "loss_mse": 0.004314932972192764, "lr": 0.0004999455438156064, "grad_norm": 0.21987958252429962, "wall_ms": 25262}
{"step": 2100, "loss": 2.803147315979004, "loss_nce": 2.8027260303497314, "loss_mse": 0.004211922641843557, "lr": 0.0004999341085295087, "grad_norm": 0.23636235296726227, "wall_ms": 26524}
{"step": 2200, "loss": 2.7278928756713867, "loss_nce": 2.727468967437744, "loss_mse": 0.00423823855817318, "lr": 0.0004999215843727024, "grad_norm": 0.2229534536600113, "wall_ms": 27783}
{"step": 2300, "loss": 2.636695384979248, "loss_nce": 2.636282444000244, "loss_mse": 0.00412943959236145, "lr": 0.0004999079714008647, "grad_norm": 0.2374623715877533, "wall_ms": 29033}
{"step": 2400, "loss": 2.614652395248413, "loss_nce": 2.6142361164093018, "loss_mse": 0.004162426572293043, "lr": 0.0004998932696745126, "grad_norm": 0.24874071776866913, "wall_ms": 30295}
{"step": 2500, "loss": 2.5497217178344727, "loss_nce": 2.549316883087158, "loss_mse": 0.004049354698508978, "lr": 0.000499877479259004, "grad_norm": 0.24955017864704132, "wall_ms": 31558}
{"step": 2600, "loss": 2.4040639400482178, "loss_nce": 2.403663158416748, "loss_mse": 0.004007314331829548, "lr": 0.000499860600224536, "grad_norm": 0.24248209595680237, "wall_ms": 32800}
{"step": 2700, "loss": 2.338850498199463, "loss_nce": 2.338451862335205, "loss_mse": 0.003986808937042952, "lr": 0.0004998426326461454, "grad_norm": 0.26135334372520447, "wall_ms": 34066}
{"step": 2800, "loss": 2.232278347015381, "loss_nce": 2.231881856918335, "loss_mse": 0.003965795040130615, "lr": 0.0004998235766037085, "grad_norm": 0.24719488620758057, "wall_ms": 35336}
{"step": 2900, "loss": 2.220489501953125, "loss_nce": 2.2200989723205566, "loss_mse": 0.0039058479014784098, "lr": 0.00049980343218194, "grad_norm": 0.2605956196784973, "wall_ms": 36605}
{"step": 3000, "loss": 2.163710832595825, "loss_nce": 2.163323163986206, "loss_mse": 0.0038771447725594044, "lr": 0.0004997821994703933, "grad_norm": 0.2547266483306885, "wall_ms": 37872}
{"step": 3100, "loss": 2.0446665287017822, "loss_nce": 2.044290542602539, "loss_mse": 0.0037592221051454544, "lr": 0.0004997598785634597, "grad_norm": 0.25110045075416565, "wall_ms": 39137}
{"step": 3200, "loss": 2.0050718784332275, "loss_nce": 2.0046944618225098, "loss_mse": 0.00377437355928123, "lr": 0.0004997364695603685, "grad_norm": 0.2446262687444687, "wall_ms": 40398}
{"step": 3300, "loss": 1.9194613695144653, "loss_nce": 1.919084072113037, "loss_mse": 0.0037734995130449533, "lr": 0.0004997119725651858, "grad_norm": 0.2699510455131531, "wall_ms": 41676}
{"step": 3400, "loss": 1.924654245376587, "loss_nce": 1.9242711067199707, "loss_mse": 0.0038313011173158884, "lr": 0.0004996863876868148, "grad_norm": 0.25902172923088074, "wall_ms": 42927}
{"step": 3500, "loss": 1.8198233842849731, "loss_nce": 1.8194541931152344, "loss_mse": 0.0036917910911142826, "lr": 0.0004996597150389944, "grad_norm": 0.25488367676734924, "wall_ms": 44174}
{"step": 3600, "loss": 1.8551217317581177, "loss_nce": 1.8547472953796387, "loss_mse": 0.0037446848582476377, "lr": 0.0004996319547402999, "grad_norm": 0.2606346309185028, "wall_ms": 45418}
{"step": 3700, "loss": 1.7473217248916626, "loss_nce": 1.7469558715820312, "loss_mse": 0.00365842436440289, "lr": 0.0004996031069141414, "grad_norm": 0.26426243782043457, "wall_ms": 46659}
{"step": 3800, "loss": 1.6352217197418213, "loss_nce": 1.6348583698272705, "loss_mse": 0.0036335294134914875, "lr": 0.000499573171688764, "grad_norm": 0.2713885009288788, "wall_ms": 47900}
{"step": 3900, "loss": 1.6203269958496094, "loss_nce": 1.6199699640274048, "loss_mse": 0.00357077200897038, "lr": 0.0004995421491972465, "grad_norm": 0.23793157935142517, "wall_ms": 49136}
{"step": 4000, "loss": 1.559437870979309, "loss_nce": 1.5590823888778687, "loss_mse": 0.003554755123332143, "lr": 0.0004995100395775015, "grad_norm": 0.2707432210445404, "wall_ms": 50373}
{"step": 4100, "loss": 1.51503324508667, "loss_nce": 1.5146814584732056, "loss_mse": 0.003518385114148259, "lr": 0.0004994768429722744, "grad_norm": 0.2706737220287323, "wall_ms": 51608}
{"step": 4200, "loss": 1.4826289415359497, "loss_nce": 1.4822776317596436, "loss_mse": 0.0035128986928611994, "lr": 0.0004994425595291432, "grad_norm": 0.2419971525669098, "wall_ms": 52855}
{"step": 4300, "loss": 1.5180779695510864, "loss_nce": 1.5177249908447266, "loss_mse": 0.0035296031273901463, "lr": 0.0004994071894005168, "grad_norm": 0.2578548192977905, "wall_ms": 54097}
{"step": 4400, "loss": 1.4410600662231445, "loss_nce": 1.4407167434692383, "loss_mse": 0.0034335507079958916, "lr": 0.0004993707327436355, "grad_norm": 0.25917932391166687, "wall_ms": 55338}
{"step": 4500, "loss": 1.3685344457626343, "loss_nce": 1.368189811706543, "loss_mse": 0.0034457684960216284, "lr": 0.0004993331897205698, "grad_norm": 0.25349780917167664, "wall_ms": 56576}
{"step": 4600, "loss": 1.3761457204818726, "loss_nce": 1.3758076429367065, "loss_mse": 0.003381312359124422, "lr": 0.0004992945604982198, "grad_norm": 0.2735377550125122, "wall_ms": 57815}
{"step": 4700, "loss": 1.2878377437591553, "loss_nce": 1.2875005006790161, "loss_mse": 0.003372671315446496, "lr": 0.0004992548452483141, "grad_norm": 0.24354733526706696, "wall_ms": 59055}
{"step": 4800, "loss": 1.249808669090271, "loss_nce": 1.2494691610336304, "loss_mse": 0.0033955122344195843, "lr": 0.0004992140441474097, "grad_norm": 0.24479731917381287, "wall_ms": 60294}
{"step": 4900, "loss": 1.2931571006774902, "loss_nce": 1.2928193807601929, "loss_mse": 0.0033777665812522173, "lr": 0.0004991721573768904, "grad_norm": 0.25774917006492615, "wall_ms": 61533}
{"step": 5000, "loss": 1.1517884731292725, "loss_nce": 1.1514623165130615, "loss_mse": 0.00326116057112813, "lr": 0.0004991291851229665, "grad_norm": 0.233129620552063, "wall_ms": 62772}
{"step": 5100, "loss": 1.1492472887039185, "loss_nce": 1.1489109992980957, "loss_mse": 0.0033632880076766014, "lr": 0.0004990851275766741, "grad_norm": 0.2481987476348877, "wall_ms": 64012}
{"step": 5200, "loss": 1.1176015138626099, "loss_nce": 1.1172726154327393, "loss_mse": 0.0032887260895222425, "lr": 0.000499039984933874, "grad_norm": 0.23515497148036957, "wall_ms": 65251}
{"step": 5300, "loss": 1.1792793273925781, "loss_nce": 1.1789493560791016, "loss_mse": 0.003299447475001216, "lr": 0.0004989937573952507, "grad_norm": 0.24888379871845245, "wall_ms": 66490}
{"step": 5400, "loss": 1.166658878326416, "loss_nce": 1.1663323640823364, "loss_mse": 0.003264589235186577, "lr": 0.0004989464451663118, "grad_norm": 0.2578488886356354, "wall_ms": 67727}
{"step": 5500, "loss": 1.0593770742416382, "loss_nce": 1.059046983718872, "loss_mse": 0.0033011201303452253, "lr": 0.0004988980484573869, "grad_norm": 0.2597412168979645, "wall_ms": 68964}
{"step": 5600, "loss": 1.0194668769836426, "loss_nce": 1.0191450119018555, "loss_mse": 0.003218550467863679, "lr": 0.0004988485674836267, "grad_norm": 0.2414485216140747, "wall_ms": 70201}
{"step": 5700, "loss": 1.009322166442871, "loss_nce": 1.0090043544769287, "loss_mse": 0.003177710110321641, "lr": 0.0004987980024650023, "grad_norm": 0.22249476611614227, "wall_ms": 71441}
{"step": 5800, "loss": 1.044026255607605, "loss_nce": 1.0437041521072388, "loss_mse": 0.0032214284874498844, "lr": 0.0004987463536263036, "grad_norm": 0.2521997094154358, "wall_ms": 72679}
{"step": 5900, "loss": 1.0341277122497559, "loss_nce": 1.0338090658187866, "loss_mse": 0.003186404937878251, "lr": 0.0004986936211971391, "grad_norm": 0.26159149408340454, "wall_ms": 73916}
{"step": 6000, "loss": 0.9546343088150024, "loss_nce": 0.9543155431747437, "loss_mse": 0.003187555121257901, "lr": 0.0004986398054119342, "grad_norm": 0.23064903914928436, "wall_ms": 75190}
{"step": 6100, "loss": 0.9976834058761597, "loss_nce": 0.9973644018173218, "loss_mse": 0.0031898003071546555, "lr": 0.0004985849065099305, "grad_norm": 0.24653884768486023, "wall_ms": 76478}
{"step": 6200, "loss": 0.9257431030273438, "loss_nce": 0.925428032875061, "loss_mse": 0.0031508938409388065, "lr": 0.0004985289247351848, "grad_norm": 0.23785129189491272, "wall_ms": 77775}
{"step": 6300, "loss": 0.9011814594268799, "loss_nce": 0.900870680809021, "loss_mse": 0.0031075840815901756, "lr": 0.0004984718603365676, "grad_norm": 0.24450835585594177, "wall_ms": 79073}
{"step": 6400, "loss": 0.9005088210105896, "loss_nce": 0.9001935124397278, "loss_mse": 0.0031529939733445644, "lr": 0.0004984137135677626, "grad_norm": 0.2224418818950653, "wall_ms": 80351}
{"step": 6500, "loss": 0.9973072409629822, "loss_nce": 0.9969943165779114, "loss_mse": 0.0031293423380702734, "lr": 0.0004983544846872649, "grad_norm": 0.24815580248832703, "wall_ms": 81616}
{"step": 6600, "loss": 0.8044720888137817, "loss_nce": 0.8041670918464661, "loss_mse": 0.0030498134437948465, "lr": 0.0004982941739583807, "grad_norm": 0.21767379343509674, "wall_ms": 82881}
{"step": 6700, "loss": 0.8772755861282349, "loss_nce": 0.8769686222076416, "loss_mse": 0.0030697830952703953, "lr": 0.0004982327816492249, "grad_norm": 0.23948131501674652, "wall_ms": 84146}
{"step": 6800, "loss": 0.8055253624916077, "loss_nce": 0.8052217364311218, "loss_mse": 0.0030365497805178165, "lr": 0.0004981703080327214, "grad_norm": 0.21609704196453094, "wall_ms": 85404}
{"step": 6900, "loss": 0.8257563710212708, "loss_nce": 0.8254505395889282, "loss_mse": 0.0030583643820136786, "lr": 0.0004981067533866005, "grad_norm": 0.23461197316646576, "wall_ms": 86681}
{"step": 7000, "loss": 0.7964259386062622, "loss_nce": 0.7961169481277466, "loss_mse": 0.003089956007897854, "lr": 0.000498042117993399, "grad_norm": 0.22227118909358978, "wall_ms": 87974}
{"step": 7100, "loss": 0.7737321853637695, "loss_nce": 0.7734238505363464, "loss_mse": 0.003083285875618458, "lr": 0.0004979764021404572, "grad_norm": 0.225134015083313, "wall_ms": 89276}
{"step": 7200, "loss": 0.7956778407096863, "loss_nce": 0.7953689098358154, "loss_mse": 0.003089244943112135, "lr": 0.0004979096061199197, "grad_norm": 0.21948456764221191, "wall_ms": 90561}
{"step": 7300, "loss": 0.7811092734336853, "loss_nce": 0.7808046340942383, "loss_mse": 0.003046260681003332, "lr": 0.0004978417302287325, "grad_norm": 0.2303379327058792, "wall_ms": 91853}
{"step": 7400, "loss": 0.7713256478309631, "loss_nce": 0.7710293531417847, "loss_mse": 0.002962888218462467, "lr": 0.0004977727747686422, "grad_norm": 0.2297215312719345, "wall_ms": 93150}
{"step": 7500, "loss": 0.7673920392990112, "loss_nce": 0.7670943140983582, "loss_mse": 0.0029772124253213406, "lr": 0.000497702740046195, "grad_norm": 0.2295544296503067, "wall_ms": 94445}
{"step": 7600, "loss": 0.7577086091041565, "loss_nce": 0.7574087977409363, "loss_mse": 0.0029983771964907646, "lr": 0.0004976316263727349, "grad_norm": 0.21667669713497162, "wall_ms": 95722}
{"step": 7700, "loss": 0.7227962017059326, "loss_nce": 0.7224991321563721, "loss_mse": 0.002970881760120392, "lr": 0.0004975594340644023, "grad_norm": 0.22949357330799103, "wall_ms": 97004}
{"step": 7800, "loss": 0.7293169498443604, "loss_nce": 0.7290219068527222, "loss_mse": 0.0029505190905183554, "lr": 0.0004974861634421329, "grad_norm": 0.22247548401355743, "wall_ms": 98304}
{"step": 7900, "loss": 0.7276906371116638, "loss_nce": 0.7273894548416138, "loss_mse": 0.003011755645275116, "lr": 0.000497411814831656, "grad_norm": 0.21145837008953094, "wall_ms": 99600}
{"step": 8000, "loss": 0.7063608765602112, "loss_nce": 0.7060660123825073, "loss_mse": 0.0029485258273780346, "lr": 0.0004973363885634934, "grad_norm": 0.221265971660614, "wall_ms": 100912}
{"step": 8100, "loss": 0.6712040305137634, "loss_nce": 0.6709043383598328, "loss_mse": 0.00299714389257133, "lr": 0.0004972598849729574, "grad_norm": 0.20862732827663422, "wall_ms": 102209}
{"step": 8200, "loss": 0.691127598285675, "loss_nce": 0.6908332705497742, "loss_mse": 0.0029432778246700764, "lr": 0.0004971823044001499, "grad_norm": 0.20964361727237701, "wall_ms": 103508}
{"step": 8300, "loss": 0.7235506772994995, "loss_nce": 0.7232604026794434, "loss_mse": 0.002902708947658539, "lr": 0.0004971036471899603, "grad_norm": 0.20408104360103607, "wall_ms": 104769}
{"step": 8400, "loss": 0.685548722743988, "loss_nce": 0.6852551698684692, "loss_mse": 0.0029353785794228315, "lr": 0.0004970239136920645, "grad_norm": 0.22251681983470917, "wall_ms": 106088}
{"step": 8500, "loss": 0.6370495557785034, "loss_nce": 0.6367589235305786, "loss_mse": 0.002906386973336339, "lr": 0.0004969431042609229, "grad_norm": 0.20084114372730255, "wall_ms": 107373}
{"step": 8600, "loss": 0.6844003796577454, "loss_nce": 0.6841086149215698, "loss_mse": 0.002917794743552804, "lr": 0.0004968612192557794, "grad_norm": 0.21822428703308105, "wall_ms": 108620}
{"step": 8700, "loss": 0.6038598418235779, "loss_nce": 0.6035749912261963, "loss_mse": 0.0028487746603786945, "lr": 0.0004967782590406587, "grad_norm": 0.20702217519283295, "wall_ms": 109856}
{"step": 8800, "loss": 0.6243779063224792, "loss_nce": 0.6240858435630798, "loss_mse": 0.002920478116720915, "lr": 0.0004966942239843664, "grad_norm": 0.20710797607898712, "wall_ms": 111094}
{"step": 8900, "loss": 0.6413896679878235, "loss_nce": 0.6411042213439941, "loss_mse": 0.0028544017113745213, "lr": 0.0004966091144604858, "grad_norm": 0.20854730904102325, "wall_ms": 112331}
{"step": 9000, "loss": 0.6790083050727844, "loss_nce": 0.678713321685791, "loss_mse": 0.002949766581878066, "lr": 0.0004965229308473765, "grad_norm": 0.215653657913208, "wall_ms": 113568}
{"step": 9100, "loss": 0.5583648681640625, "loss_nce": 0.5580774545669556, "loss_mse": 0.002873961813747883, "lr": 0.000496435673528174, "grad_norm": 0.18850399553775787, "wall_ms": 114805}
{"step": 9200, "loss": 0.6187049746513367, "loss_nce": 0.6184172630310059, "loss_mse": 0.0028769546188414097, "lr": 0.000496347342890786, "grad_norm": 0.2203495055437088, "wall_ms": 116042}
{"step": 9300, "loss": 0.5890607237815857, "loss_nce": 0.5887725353240967, "loss_mse": 0.00288158911280334, "lr": 0.0004962579393278923, "grad_norm": 0.17919428646564484, "wall_ms": 117281}
{"step": 9400, "loss": 0.5896563529968262, "loss_nce": 0.5893700122833252, "loss_mse": 0.0028636627830564976, "lr": 0.0004961674632369425, "grad_norm": 0.2064957320690155, "wall_ms": 118521}
{"step": 9500, "loss": 0.561577320098877, "loss_nce": 0.5612909197807312, "loss_mse": 0.0028641975950449705, "lr": 0.0004960759150201537, "grad_norm": 0.19427372515201569, "wall_ms": 119758}
{"step": 9600, "loss": 0.5957357287406921, "loss_nce": 0.595448911190033, "loss_mse": 0.002868278883397579, "lr": 0.0004959832950845099, "grad_norm": 0.2097935527563095, "wall_ms": 120997}
{"step": 9700, "loss": 0.5428619384765625, "loss_nce": 0.5425812005996704, "loss_mse": 0.0028076451271772385, "lr": 0.0004958896038417587, "grad_norm": 0.19434940814971924, "wall_ms": 122237}
{"step": 9800, "loss": 0.504831075668335, "loss_nce": 0.5045487880706787, "loss_mse": 0.0028226026333868504, "lr": 0.000495794841708411, "grad_norm": 0.19763971865177155, "wall_ms": 123474}
{"step": 9900, "loss": 0.49812743067741394, "loss_nce": 0.4978458881378174, "loss_mse": 0.0028153080493211746, "lr": 0.0004956990091057381, "grad_norm": 0.17846353352069855, "wall_ms": 124712}
{"step": 10000, "loss": 0.601192057132721, "loss_nce": 0.6009118556976318, "loss_mse": 0.002802157774567604, "lr": 0.00049560210645977, "grad_norm": 0.21063122153282166, "wall_ms": 125950}
{"step": 10100, "loss": 0.5622888803482056, "loss_nce": 0.5620043873786926, "loss_mse": 0.0028450707904994488, "lr": 0.0004955041342012939, "grad_norm": 0.2107170969247818, "wall_ms": 139479}
{"step": 10200, "loss": 0.5058260560035706, "loss_nce": 0.5055373907089233, "loss_mse": 0.002886793576180935, "lr": 0.0004954050927658518, "grad_norm": 0.19726824760437012, "wall_ms": 140744}
{"step": 10300, "loss": 0.5462771058082581, "loss_nce": 0.5459959506988525, "loss_mse": 0.002811550861224532, "lr": 0.000495304982593739, "grad_norm": 0.20614461600780487, "wall_ms": 141993}
{"step": 10400, "loss": 0.5749295353889465, "loss_nce": 0.5746469497680664, "loss_mse": 0.0028257069643586874, "lr": 0.000495203804130002, "grad_norm": 0.2118476927280426, "wall_ms": 143246}
{"step": 10500, "loss": 0.4897611439228058, "loss_nce": 0.48947930335998535, "loss_mse": 0.002818277571350336, "lr": 0.0004951015578244361, "grad_norm": 0.19103330373764038, "wall_ms": 144502}
{"step": 10600, "loss": 0.4792027771472931, "loss_nce": 0.4789269268512726, "loss_mse": 0.00275838072411716, "lr": 0.0004949982441315841, "grad_norm": 0.17442630231380463, "wall_ms": 145759}
{"step": 10700, "loss": 0.5353379845619202, "loss_nce": 0.535058319568634, "loss_mse": 0.00279669975861907, "lr": 0.0004948938635107338, "grad_norm": 0.19385530054569244, "wall_ms": 147017}
{"step": 10800, "loss": 0.5180994272232056, "loss_nce": 0.5178233981132507, "loss_mse": 0.002760461298748851, "lr": 0.0004947884164259161, "grad_norm": 0.1856047660112381, "wall_ms": 148282}
{"step": 10900, "loss": 0.48012575507164, "loss_nce": 0.47985032200813293, "loss_mse": 0.002754458226263523, "lr": 0.000494681903345903, "grad_norm": 0.16931428015232086, "wall_ms": 149536}
{"step": 11000, "loss": 0.5446467995643616, "loss_nce": 0.5443692207336426, "loss_mse": 0.0027756444178521633, "lr": 0.0004945743247442054, "grad_norm": 0.2001744508743286, "wall_ms": 150783}
{"step": 11100, "loss": 0.46441513299942017, "loss_nce": 0.4641427993774414, "loss_mse": 0.002723215613514185, "lr": 0.000494465681099071, "grad_norm": 0.17426183819770813, "wall_ms": 152034}
{"step": 11200, "loss": 0.4897639751434326, "loss_nce": 0.4894905984401703, "loss_mse": 0.0027338278014212847, "lr": 0.0004943559728934825, "grad_norm": 0.18293166160583496, "wall_ms": 153277}
{"step": 11300, "loss": 0.5034466981887817, "loss_nce": 0.5031721591949463, "loss_mse": 0.0027454409282654524, "lr": 0.0004942452006151546, "grad_norm": 0.19339101016521454, "wall_ms": 154527}
{"step": 11400, "loss": 0.5317142009735107, "loss_nce": 0.5314353108406067, "loss_mse": 0.0027890829369425774, "lr": 0.000494133364756533, "grad_norm": 0.18850964307785034, "wall_ms": 155784}
{"step": 11500, "loss": 0.5172461867332458, "loss_nce": 0.5169718265533447, "loss_mse": 0.002743713092058897, "lr": 0.0004940204658147914, "grad_norm": 0.1730184555053711, "wall_ms": 157056}
{"step": 11600, "loss": 0.4490832984447479, "loss_nce": 0.4488074779510498, "loss_mse": 0.0027582035399973392, "lr": 0.0004939065042918293, "grad_norm": 0.18820162117481232, "wall_ms": 158312}
{"step": 11700, "loss": 0.5211197733879089, "loss_nce": 0.5208438038825989, "loss_mse": 0.0027597045991569757, "lr": 0.0004937914806942703, "grad_norm": 0.19280672073364258, "wall_ms": 159572}
{"step": 11800, "loss": 0.4633050262928009, "loss_nce": 0.4630295932292938, "loss_mse": 0.002754191169515252, "lr": 0.0004936753955334593, "grad_norm": 0.17986227571964264, "wall_ms": 160827}
{"step": 11900, "loss": 0.5157403945922852, "loss_nce": 0.5154649019241333, "loss_mse": 0.0027551702223718166, "lr": 0.0004935582493254606, "grad_norm": 0.18131983280181885, "wall_ms": 162094}
{"step": 12000, "loss": 0.49274781346321106, "loss_nce": 0.4924730360507965, "loss_mse": 0.002747884951531887, "lr": 0.0004934400425910553, "grad_norm": 0.18162202835083008, "wall_ms": 163362}
{"step": 12100, "loss": 0.4889158606529236, "loss_nce": 0.48864519596099854, "loss_mse": 0.0027066373731940985, "lr": 0.0004933207758557392, "grad_norm": 0.17929144203662872, "wall_ms": 164632}
{"step": 12200, "loss": 0.46702882647514343, "loss_nce": 0.4667600989341736, "loss_mse": 0.0026871724985539913, "lr": 0.0004932004496497205, "grad_norm": 0.1828034371137619, "wall_ms": 165912}
{"step": 12300, "loss": 0.4825487732887268, "loss_nce": 0.4822782278060913, "loss_mse": 0.0027053167577832937, "lr": 0.0004930790645079173, "grad_norm": 0.17184926569461823, "wall_ms": 167202}
{"step": 12400, "loss": 0.42699718475341797, "loss_nce": 0.42672812938690186, "loss_mse": 0.0026904758997261524, "lr": 0.0004929566209699553, "grad_norm": 0.1840667426586151, "wall_ms": 168469}
{"step": 12500, "loss": 0.4769386053085327, "loss_nce": 0.4766634404659271, "loss_mse": 0.0027516724076122046, "lr": 0.0004928331195801651, "grad_norm": 0.1798103153705597, "wall_ms": 169716}
{"step": 12600, "loss": 0.531720757484436, "loss_nce": 0.5314465165138245, "loss_mse": 0.0027425463777035475, "lr": 0.0004927085608875803, "grad_norm": 0.20756128430366516, "wall_ms": 170960}
{"step": 12700, "loss": 0.5000624060630798, "loss_nce": 0.49979227781295776, "loss_mse": 0.0027011956553906202, "lr": 0.0004925829454459351, "grad_norm": 0.18598538637161255, "wall_ms": 172200}
{"step": 12800, "loss": 0.40793484449386597, "loss_nce": 0.40766891837120056, "loss_mse": 0.0026593224611133337, "lr": 0.0004924562738136607, "grad_norm": 0.17773286998271942, "wall_ms": 173447}
{"step": 12900, "loss": 0.4153425097465515, "loss_nce": 0.4150744080543518, "loss_mse": 0.002681154292076826, "lr": 0.0004923285465538845, "grad_norm": 0.16669707000255585, "wall_ms": 174689}
{"step": 13000, "loss": 0.4626079201698303, "loss_nce": 0.46233588457107544, "loss_mse": 0.0027202365454286337, "lr": 0.0004921997642344264, "grad_norm": 0.17971155047416687, "wall_ms": 175933}
{"step": 13100, "loss": 0.39708077907562256, "loss_nce": 0.3968122601509094, "loss_mse": 0.002685306593775749, "lr": 0.0004920699274277967, "grad_norm": 0.15765151381492615, "wall_ms": 177201}
{"step": 13200, "loss": 0.39799362421035767, "loss_nce": 0.39772629737854004, "loss_mse": 0.0026732084807008505, "lr": 0.0004919390367111934, "grad_norm": 0.17944175004959106, "wall_ms": 178473}
{"step": 13300, "loss": 0.4041524827480316, "loss_nce": 0.40388548374176025, "loss_mse": 0.002669848967343569, "lr": 0.0004918070926664998, "grad_norm": 0.15559625625610352, "wall_ms": 179745}
{"step": 13400, "loss": 0.4519050121307373, "loss_nce": 0.4516305923461914, "loss_mse": 0.0027443142607808113, "lr": 0.0004916740958802819, "grad_norm": 0.18889760971069336, "wall_ms": 181014}
{"step": 13500, "loss": 0.4308672249317169, "loss_nce": 0.4305964708328247, "loss_mse": 0.002707600360736251, "lr": 0.0004915400469437857, "grad_norm": 0.17594248056411743, "wall_ms": 182261}
{"step": 13600, "loss": 0.41161948442459106, "loss_nce": 0.41134828329086304, "loss_mse": 0.0027120921295136213, "lr": 0.0004914049464529346, "grad_norm": 0.16464540362358093, "wall_ms": 183504}
{"step": 13700, "loss": 0.48415878415107727, "loss_nce": 0.4838809370994568, "loss_mse": 0.002778503578156233, "lr": 0.0004912687950083265, "grad_norm": 0.18654002249240875, "wall_ms": 184748}
{"step": 13800, "loss": 0.4070359170436859, "loss_nce": 0.40676793456077576, "loss_mse": 0.0026798490434885025, "lr": 0.0004911315932152318, "grad_norm": 0.16651766002178192, "wall_ms": 185995}
{"step": 13900, "loss": 0.43864956498146057, "loss_nce": 0.4383808374404907, "loss_mse": 0.002687406726181507, "lr": 0.0004909933416835901, "grad_norm": 0.18998582661151886, "wall_ms": 187245}
{"step": 14000, "loss": 0.42053914070129395, "loss_nce": 0.42027246952056885, "loss_mse": 0.0026665946934372187, "lr": 0.0004908540410280077, "grad_norm": 0.17731501162052155, "wall_ms": 188490}
{"step": 14100, "loss": 0.4300225079059601, "loss_nce": 0.4297597110271454, "loss_mse": 0.0026279930025339127, "lr": 0.0004907136918677548, "grad_norm": 0.17646026611328125, "wall_ms": 189733}
{"step": 14200, "loss": 0.4301082193851471, "loss_nce": 0.4298432171344757, "loss_mse": 0.0026500008534640074, "lr": 0.0004905722948267627, "grad_norm": 0.18177340924739838, "wall_ms": 190975}
{"step": 14300, "loss": 0.4381764233112335, "loss_nce": 0.43790939450263977, "loss_mse": 0.0026701430324465036, "lr": 0.0004904298505336214, "grad_norm": 0.19207064807415009, "wall_ms": 192216}
{"step": 14400, "loss": 0.4063374698162079, "loss_nce": 0.40607255697250366, "loss_mse": 0.002649220172315836, "lr": 0.0004902863596215765, "grad_norm": 0.1697869747877121, "wall_ms": 193457}
{"step": 14500, "loss": 0.43307921290397644, "loss_nce": 0.4328100085258484, "loss_mse": 0.0026921797543764114, "lr": 0.0004901418227285261, "grad_norm": 0.16079390048980713, "wall_ms": 194699}
{"step": 14600, "loss": 0.40937110781669617, "loss_nce": 0.4091108441352844, "loss_mse": 0.002602742053568363, "lr": 0.0004899962404970186, "grad_norm": 0.18169033527374268, "wall_ms": 195942}
{"step": 14700, "loss": 0.3712902069091797, "loss_nce": 0.3710258901119232, "loss_mse": 0.0026431656442582607, "lr": 0.0004898496135742492, "grad_norm": 0.16921654343605042, "wall_ms": 197186}
{"step": 14800, "loss": 0.3915945887565613, "loss_nce": 0.3913293480873108, "loss_mse": 0.0026524595450609922, "lr": 0.0004897019426120579, "grad_norm": 0.15874388813972473, "wall_ms": 198443}
{"step": 14900, "loss": 0.39306142926216125, "loss_nce": 0.39279425144195557, "loss_mse": 0.002671875525265932, "lr": 0.0004895532282669253, "grad_norm": 0.17556197941303253, "wall_ms": 199691}
{"step": 15000, "loss": 0.4184717535972595, "loss_nce": 0.41821134090423584, "loss_mse": 0.0026041208766400814, "lr": 0.000489403471199971, "grad_norm": 0.16536250710487366, "wall_ms": 200964}
{"step": 15100, "loss": 0.39542055130004883, "loss_nce": 0.39515459537506104, "loss_mse": 0.002659599529579282, "lr": 0.00048925267207695, "grad_norm": 0.17573001980781555, "wall_ms": 202207}
{"step": 15200, "loss": 0.358292818069458, "loss_nce": 0.3580363094806671, "loss_mse": 0.0025650763418525457, "lr": 0.0004891008315682495, "grad_norm": 0.150891974568367, "wall_ms": 203452}
{"step": 15300, "loss": 0.40231096744537354, "loss_nce": 0.40204671025276184, "loss_mse": 0.0026426701806485653, "lr": 0.0004889479503488867, "grad_norm": 0.17432530224323273, "wall_ms": 204710}
{"step": 15400, "loss": 0.4071579575538635, "loss_nce": 0.40689408779144287, "loss_mse": 0.002638649195432663, "lr": 0.0004887940290985049, "grad_norm": 0.16351182758808136, "wall_ms": 205964}
{"step": 15500, "loss": 0.37041640281677246, "loss_nce": 0.37015336751937866, "loss_mse": 0.0026304542552679777, "lr": 0.0004886390685013714, "grad_norm": 0.16592690348625183, "wall_ms": 207217}
{"step": 15600, "loss": 0.400317519903183, "loss_nce": 0.4000588059425354, "loss_mse": 0.002587096532806754, "lr": 0.0004884830692463736, "grad_norm": 0.18095195293426514, "wall_ms": 208474}
{"step": 15700, "loss": 0.3900415599346161, "loss_nce": 0.38977569341659546, "loss_mse": 0.0026587732136249542, "lr": 0.0004883260320270164, "grad_norm": 0.15556420385837555, "wall_ms": 209730}
{"step": 15800, "loss": 0.37411990761756897, "loss_nce": 0.3738577961921692, "loss_mse": 0.002621080493554473, "lr": 0.0004881679575414192, "grad_norm": 0.1536070555448532, "wall_ms": 210981}
{"step": 15900, "loss": 0.4231642782688141, "loss_nce": 0.42290282249450684, "loss_mse": 0.002614565659314394, "lr": 0.00048800884649231267, "grad_norm": 0.17213717103004456, "wall_ms": 212228}
{"step": 16000, "loss": 0.38367214798927307, "loss_nce": 0.3834104537963867, "loss_mse": 0.002616971032693982, "lr": 0.00048784869958703515, "grad_norm": 0.1804238110780716, "wall_ms": 213476}
{"step": 16100, "loss": 0.36247560381889343, "loss_nce": 0.3622198700904846, "loss_mse": 0.002557295374572277, "lr": 0.0004876875175375305, "grad_norm": 0.16559629142284393, "wall_ms": 214727}
{"step": 16200, "loss": 0.3613949418067932, "loss_nce": 0.36113208532333374, "loss_mse": 0.0026286349166184664, "lr": 0.0004875253010603439, "grad_norm": 0.16476662456989288, "wall_ms": 215978}
{"step": 16300, "loss": 0.3286890387535095, "loss_nce": 0.3284238874912262, "loss_mse": 0.0026514590717852116, "lr": 0.0004873620508766197, "grad_norm": 0.15629637241363525, "wall_ms": 217237}
{"step": 16400, "loss": 0.39453038573265076, "loss_nce": 0.3942721486091614, "loss_mse": 0.0025822354946285486, "lr": 0.00048719776771209705, "grad_norm": 0.1669316589832306, "wall_ms": 218500}
{"step": 16500, "loss": 0.33980974555015564, "loss_nce": 0.3395548462867737, "loss_mse": 0.002548987278714776, "lr": 0.0004870324522971077, "grad_norm": 0.14887605607509613, "wall_ms": 219765}
{"step": 16600, "loss": 0.3540475368499756, "loss_nce": 0.35378581285476685, "loss_mse": 0.0026173060759902, "lr": 0.00048686610536657226, "grad_norm": 0.15908418595790863, "wall_ms": 221020}
{"step": 16700, "loss": 0.36344149708747864, "loss_nce": 0.3631805181503296, "loss_mse": 0.0026099293027073145, "lr": 0.0004866987276599968, "grad_norm": 0.16599316895008087, "wall_ms": 222266}
{"step": 16800, "loss": 0.35173270106315613, "loss_nce": 0.3514697551727295, "loss_mse": 0.002629349008202553, "lr": 0.00048653031992147017, "grad_norm": 0.160098597407341, "wall_ms": 223514}
{"step": 16900, "loss": 0.37446728348731995, "loss_nce": 0.3742087483406067, "loss_mse": 0.002585216425359249, "lr": 0.0004863608828996599, "grad_norm": 0.16194254159927368, "wall_ms": 224770}
{"step": 17000, "loss": 0.35142579674720764, "loss_nce": 0.3511701226234436, "loss_mse": 0.0025566471740603447, "lr": 0.00048619041734780937, "grad_norm": 0.15312278270721436, "wall_ms": 226037}
{"step": 17100, "loss": 0.3727259039878845, "loss_nce": 0.372462660074234, "loss_mse": 0.0026323962956666946, "lr": 0.00048601892402373465, "grad_norm": 0.1582597941160202, "wall_ms": 227300}
{"step": 17200, "loss": 0.3444313108921051, "loss_nce": 0.3441714644432068, "loss_mse": 0.002598595106974244, "lr": 0.00048584640368982035, "grad_norm": 0.15440529584884644, "wall_ms": 228563}
{"step": 17300, "loss": 0.3956805169582367, "loss_nce": 0.3954178988933563, "loss_mse": 0.0026260402519255877, "lr": 0.00048567285711301715, "grad_norm": 0.1693306416273117, "wall_ms": 229829}
{"step": 17400, "loss": 0.33353957533836365, "loss_nce": 0.3332785964012146, "loss_mse": 0.002609668765217066, "lr": 0.00048549828506483773, "grad_norm": 0.14699958264827728, "wall_ms": 231098}
{"step": 17500, "loss": 0.3901306986808777, "loss_nce": 0.3898700475692749, "loss_mse": 0.0026066319551318884, "lr": 0.0004853226883213536, "grad_norm": 0.16285306215286255, "wall_ms": 232381}
{"step": 17600, "loss": 0.3790067732334137, "loss_nce": 0.37874430418014526, "loss_mse": 0.0026247159112244844, "lr": 0.0004851460676631916, "grad_norm": 0.16574032604694366, "wall_ms": 233642}
{"step": 17700, "loss": 0.3289877474308014, "loss_nce": 0.3287256956100464, "loss_mse": 0.0026205938775092363, "lr": 0.0004849684238755307, "grad_norm": 0.14729249477386475, "wall_ms": 234889}
{"step": 17800, "loss": 0.37749308347702026, "loss_nce": 0.3772287964820862, "loss_mse": 0.0026429048739373684, "lr": 0.0004847897577480978, "grad_norm": 0.16489464044570923, "wall_ms": 236135}
{"step": 17900, "loss": 0.3918735980987549, "loss_nce": 0.3916175365447998, "loss_mse": 0.002560603665187955, "lr": 0.00048461007007516514, "grad_norm": 0.1707463413476944, "wall_ms": 237374}
{"step": 18000, "loss": 0.3478044271469116, "loss_nce": 0.3475453853607178, "loss_mse": 0.0025904260110110044, "lr": 0.000484429361655546, "grad_norm": 0.14820747077465057, "wall_ms": 238612}
{"step": 18100, "loss": 0.3595327138900757, "loss_nce": 0.3592721223831177, "loss_mse": 0.002605787478387356, "lr": 0.00048424763329259153, "grad_norm": 0.1706157922744751, "wall_ms": 239852}
{"step": 18200, "loss": 0.3627466857433319, "loss_nce": 0.3624861240386963, "loss_mse": 0.002605628687888384, "lr": 0.0004840648857941872, "grad_norm": 0.1487606167793274, "wall_ms": 241090}
{"step": 18300, "loss": 0.38516682386398315, "loss_nce": 0.3849070072174072, "loss_mse": 0.002598315244540572, "lr": 0.00048388111997274905, "grad_norm": 0.17215250432491302, "wall_ms": 242330}
{"step": 18400, "loss": 0.3192217946052551, "loss_nce": 0.31896770000457764, "loss_mse": 0.0025408451911062002, "lr": 0.00048369633664522004, "grad_norm": 0.14672836661338806, "wall_ms": 243570}
{"step": 18500, "loss": 0.3584836721420288, "loss_nce": 0.35822349786758423, "loss_mse": 0.0026018652133643627, "lr": 0.0004835105366330668, "grad_norm": 0.16011029481887817, "wall_ms": 244816}
{"step": 18600, "loss": 0.3677709102630615, "loss_nce": 0.36751845479011536, "loss_mse": 0.002524568000808358, "lr": 0.00048332372076227535, "grad_norm": 0.16571937501430511, "wall_ms": 246061}
{"step": 18700, "loss": 0.32090574502944946, "loss_nce": 0.32065391540527344, "loss_mse": 0.0025182808749377728, "lr": 0.00048313588986334804, "grad_norm": 0.15420320630073547, "wall_ms": 247300}
{"step": 18800, "loss": 0.3409799635410309, "loss_nce": 0.3407208025455475, "loss_mse": 0.0025917317252606153, "lr": 0.00048294704477129947, "grad_norm": 0.16044197976589203, "wall_ms": 248542}
{"step": 18900, "loss": 0.3349510431289673, "loss_nce": 0.33469337224960327, "loss_mse": 0.0025767465122044086, "lr": 0.000482757186325653, "grad_norm": 0.1685633659362793, "wall_ms": 249785}
{"step": 19000, "loss": 0.33654430508613586, "loss_nce": 0.33628392219543457, "loss_mse": 0.002603805623948574, "lr": 0.00048256631537043666, "grad_norm": 0.14661584794521332, "wall_ms": 251026}
{"step": 19100, "loss": 0.34101301431655884, "loss_nce": 0.34075671434402466, "loss_mse": 0.0025631047319620848, "lr": 0.00048237443275418013, "grad_norm": 0.14307668805122375, "wall_ms": 252278}
{"step": 19200, "loss": 0.3408719599246979, "loss_nce": 0.34061241149902344, "loss_mse": 0.0025955243036150932, "lr": 0.00048218153932991, "grad_norm": 0.15741126239299774, "wall_ms": 253527}
{"step": 19300, "loss": 0.3302043378353119, "loss_nce": 0.3299447298049927, "loss_mse": 0.0025961538776755333, "lr": 0.0004819876359551467, "grad_norm": 0.17181169986724854, "wall_ms": 254777}
{"step": 19400, "loss": 0.3757760226726532, "loss_nce": 0.37552201747894287, "loss_mse": 0.0025400416925549507, "lr": 0.0004817927234919005, "grad_norm": 0.16781209409236908, "wall_ms": 256025}
{"step": 19500, "loss": 0.3341430723667145, "loss_nce": 0.33388763666152954, "loss_mse": 0.0025542662478983402, "lr": 0.0004815968028066677, "grad_norm": 0.1509665846824646, "wall_ms": 257288}
{"step": 19600, "loss": 0.3339541256427765, "loss_nce": 0.33369797468185425, "loss_mse": 0.0025614548940211535, "lr": 0.0004813998747704265, "grad_norm": 0.15743786096572876, "wall_ms": 258545}
{"step": 19700, "loss": 0.3184262216091156, "loss_nce": 0.3181706666946411, "loss_mse": 0.0025556024629622698, "lr": 0.0004812019402586333, "grad_norm": 0.1509505957365036, "wall_ms": 259806}
{"step": 19800, "loss": 0.33549991250038147, "loss_nce": 0.3352431654930115, "loss_mse": 0.002567413728684187, "lr": 0.0004810030001512193, "grad_norm": 0.15572410821914673, "wall_ms": 261073}
{"step": 19900, "loss": 0.3355323374271393, "loss_nce": 0.3352689743041992, "loss_mse": 0.0026335224974900484, "lr": 0.00048080305533258565, "grad_norm": 0.1550978273153305, "wall_ms": 262358}
{"step": 20000, "loss": 0.319572776556015, "loss_nce": 0.31931668519973755, "loss_mse": 0.0025609214790165424, "lr": 0.00048060210669160033, "grad_norm": 0.14005805552005768, "wall_ms": 263637}
{"step": 20100, "loss": 0.34015950560569763, "loss_nce": 0.33989715576171875, "loss_mse": 0.0026235308032482862, "lr": 0.00048040015512159365, "grad_norm": 0.1589469462633133, "wall_ms": 280907}
{"step": 20200, "loss": 0.3065737783908844, "loss_nce": 0.3063163459300995, "loss_mse": 0.00257437233813107, "lr": 0.0004801972015203547, "grad_norm": 0.1473190188407898, "wall_ms": 282182}
{"step": 20300, "loss": 0.3233494460582733, "loss_nce": 0.32309457659721375, "loss_mse": 0.002548557473346591, "lr": 0.000479993246790127, "grad_norm": 0.14609965682029724, "wall_ms": 283445}
{"step": 20400, "loss": 0.3403374254703522, "loss_nce": 0.3400765061378479, "loss_mse": 0.00260909553617239, "lr": 0.00047978829183760474, "grad_norm": 0.1488427370786667, "wall_ms": 284709}
{"step": 20500, "loss": 0.32767292857170105, "loss_nce": 0.32741931080818176, "loss_mse": 0.002536127343773842, "lr": 0.0004795823375739287, "grad_norm": 0.1551869660615921, "wall_ms": 285964}
{"step": 20600, "loss": 0.30661916732788086, "loss_nce": 0.3063637912273407, "loss_mse": 0.002553725615143776, "lr": 0.00047937538491468214, "grad_norm": 0.14933903515338898, "wall_ms": 287216}
{"step": 20700, "loss": 0.3115425109863281, "loss_nce": 0.3112867772579193, "loss_mse": 0.0025572585873305798, "lr": 0.00047916743477988664, "grad_norm": 0.1566336452960968, "wall_ms": 288458}
{"step": 20800, "loss": 0.3122884929180145, "loss_nce": 0.31203794479370117, "loss_mse": 0.00250540510751307, "lr": 0.0004789584880939984, "grad_norm": 0.14004439115524292, "wall_ms": 289703}
{"step": 20900, "loss": 0.3506487011909485, "loss_nce": 0.350393146276474, "loss_mse": 0.002555422019213438, "lr": 0.0004787485457859035, "grad_norm": 0.15553312003612518, "wall_ms": 290949}
{"step": 21000, "loss": 0.3256571292877197, "loss_nce": 0.32540035247802734, "loss_mse": 0.0025679029058665037, "lr": 0.0004785376087889143, "grad_norm": 0.16129733622074127, "wall_ms": 292196}
{"step": 21100, "loss": 0.29981961846351624, "loss_nce": 0.29956233501434326, "loss_mse": 0.0025729131884872913, "lr": 0.00047832567804076523, "grad_norm": 0.1469864696264267, "wall_ms": 293435}
{"step": 21200, "loss": 0.33916741609573364, "loss_nce": 0.3389071822166443, "loss_mse": 0.00260219001211226, "lr": 0.0004781127544836083, "grad_norm": 0.16889870166778564, "wall_ms": 294680}
{"step": 21300, "loss": 0.2806248664855957, "loss_nce": 0.2803703248500824, "loss_mse": 0.0025453960988670588, "lr": 0.0004778988390640092, "grad_norm": 0.14794287085533142, "wall_ms": 295925}
{"step": 21400, "loss": 0.32488343119621277, "loss_nce": 0.32462650537490845, "loss_mse": 0.002569146454334259, "lr": 0.00047768393273294303, "grad_norm": 0.16254432499408722, "wall_ms": 297161}
{"step": 21500, "loss": 0.3588111698627472, "loss_nce": 0.3585525155067444, "loss_mse": 0.002586635760962963, "lr": 0.00047746803644579006, "grad_norm": 0.14922881126403809, "wall_ms": 298399}
{"step": 21600, "loss": 0.3214970827102661, "loss_nce": 0.32124173641204834, "loss_mse": 0.002553534461185336, "lr": 0.00047725115116233145, "grad_norm": 0.1576545685529709, "wall_ms": 299664}
{"step": 21700, "loss": 0.33582955598831177, "loss_nce": 0.33557650446891785, "loss_mse": 0.0025304362643510103, "lr": 0.00047703327784674493, "grad_norm": 0.15771649777889252, "wall_ms": 300912}
{"step": 21800, "loss": 0.28143787384033203, "loss_nce": 0.2811911702156067, "loss_mse": 0.0024670306593179703, "lr": 0.00047681441746760074, "grad_norm": 0.1368851661682129, "wall_ms": 302155}
{"step": 21900, "loss": 0.2999701201915741, "loss_nce": 0.2997143566608429, "loss_mse": 0.002557644620537758, "lr": 0.0004765945709978571, "grad_norm": 0.14533217251300812, "wall_ms": 303397}
{"step": 22000, "loss": 0.3476794362068176, "loss_nce": 0.34742408990859985, "loss_mse": 0.002553535159677267, "lr": 0.0004763737394148561, "grad_norm": 0.16704323887825012, "wall_ms": 304659}
{"step": 22100, "loss": 0.3175770342350006, "loss_nce": 0.31732064485549927, "loss_mse": 0.0025637492071837187, "lr": 0.0004761519237003188, "grad_norm": 0.16066226363182068, "wall_ms": 305911}
{"step": 22200, "loss": 0.34921762347221375, "loss_nce": 0.34896478056907654, "loss_mse": 0.0025285223964601755, "lr": 0.0004759291248403418, "grad_norm": 0.1513625979423523, "wall_ms": 307167}
{"step": 22300, "loss": 0.2952629625797272, "loss_nce": 0.2950047552585602, "loss_mse": 0.002582005923613906, "lr": 0.000475705343825392, "grad_norm": 0.14312317967414856, "wall_ms": 308422}
{"step": 22400, "loss": 0.28197339177131653, "loss_nce": 0.28171777725219727, "loss_mse": 0.0025560460053384304, "lr": 0.00047548058165030265, "grad_norm": 0.13269412517547607, "wall_ms": 309677}
{"step": 22500, "loss": 0.34976571798324585, "loss_nce": 0.3495121896266937, "loss_mse": 0.0025353822857141495, "lr": 0.0004752548393142688, "grad_norm": 0.17065304517745972, "wall_ms": 310919}
{"step": 22600, "loss": 0.31488141417503357, "loss_nce": 0.3146243095397949, "loss_mse": 0.002570957411080599, "lr": 0.0004750281178208429, "grad_norm": 0.1608382612466812, "wall_ms": 312158}
{"step": 22700, "loss": 0.28974729776382446, "loss_nce": 0.28949809074401855, "loss_mse": 0.002492035971954465, "lr": 0.0004748004181779303, "grad_norm": 0.14362768828868866, "wall_ms": 313399}
{"step": 22800, "loss": 0.2975268065929413, "loss_nce": 0.29726308584213257, "loss_mse": 0.0026372529100626707, "lr": 0.00047457174139778466, "grad_norm": 0.15766093134880066, "wall_ms": 314644}
{"step": 22900, "loss": 0.2945246398448944, "loss_nce": 0.29426950216293335, "loss_mse": 0.0025512301363050938, "lr": 0.00047434208849700366, "grad_norm": 0.14958779513835907, "wall_ms": 315883}
{"step": 23000, "loss": 0.31476110219955444, "loss_nce": 0.3145090937614441, "loss_mse": 0.002520086709409952, "lr": 0.0004741114604965246, "grad_norm": 0.14968779683113098, "wall_ms": 317122}
{"step": 23100, "loss": 0.3020475506782532, "loss_nce": 0.30179646611213684, "loss_mse": 0.00251083099283278, "lr": 0.00047387985842161924, "grad_norm": 0.1506219208240509, "wall_ms": 318374}
{"step": 23200, "loss": 0.2982136011123657, "loss_nce": 0.2979557514190674, "loss_mse": 0.0025786201003938913, "lr": 0.00047364728330188987, "grad_norm": 0.14112620055675507, "wall_ms": 319628}
{"step": 23300, "loss": 0.2730824053287506, "loss_nce": 0.27282723784446716, "loss_mse": 0.0025518054608255625, "lr": 0.0004734137361712646, "grad_norm": 0.1574234813451767, "wall_ms": 320892}
{"step": 23400, "loss": 0.3247431516647339, "loss_nce": 0.3244849443435669, "loss_mse": 0.00258220499381423, "lr": 0.00047317921806799254, "grad_norm": 0.15837208926677704, "wall_ms": 322143}
{"step": 23500, "loss": 0.28831905126571655, "loss_nce": 0.2880721390247345, "loss_mse": 0.002469110768288374, "lr": 0.00047294373003463927, "grad_norm": 0.12948785722255707, "wall_ms": 323754}
{"step": 23600, "loss": 0.272556871175766, "loss_nce": 0.27230304479599, "loss_mse": 0.002538177650421858, "lr": 0.0004727072731180824, "grad_norm": 0.13220840692520142, "wall_ms": 325011}
{"step": 23700, "loss": 0.2595420479774475, "loss_nce": 0.25929176807403564, "loss_mse": 0.002502747345715761, "lr": 0.00047246984836950674, "grad_norm": 0.13741588592529297, "wall_ms": 326259}
{"step": 23800, "loss": 0.26836317777633667, "loss_nce": 0.26811686158180237, "loss_mse": 0.0024632199201732874, "lr": 0.00047223145684439975, "grad_norm": 0.14846615493297577, "wall_ms": 327516}
{"step": 23900, "loss": 0.2880904972553253, "loss_nce": 0.2878350019454956, "loss_mse": 0.0025549328420311213, "lr": 0.00047199209960254644, "grad_norm": 0.1369399130344391, "wall_ms": 328807}
{"step": 24000, "loss": 0.2995162308216095, "loss_nce": 0.29926547408103943, "loss_mse": 0.0025076125748455524, "lr": 0.0004717517777080255, "grad_norm": 0.14208927750587463, "wall_ms": 330106}
{"step": 24100, "loss": 0.2759130895137787, "loss_nce": 0.2756579518318176, "loss_mse": 0.002551444573327899, "lr": 0.0004715104922292034, "grad_norm": 0.13605628907680511, "wall_ms": 331371}
{"step": 24200, "loss": 0.29742395877838135, "loss_nce": 0.29716670513153076, "loss_mse": 0.002572481520473957, "lr": 0.0004712682442387307, "grad_norm": 0.12641996145248413, "wall_ms": 332618}
{"step": 24300, "loss": 0.2943750321865082, "loss_nce": 0.2941287159919739, "loss_mse": 0.0024631116539239883, "lr": 0.0004710250348135369, "grad_norm": 0.1396215707063675, "wall_ms": 333868}
{"step": 24400, "loss": 0.2757183909416199, "loss_nce": 0.275463342666626, "loss_mse": 0.002550597535446286, "lr": 0.0004707808650348256, "grad_norm": 0.13870902359485626, "wall_ms": 335111}
{"step": 24500, "loss": 0.33499598503112793, "loss_nce": 0.33474305272102356, "loss_mse": 0.0025292891077697277, "lr": 0.0004705357359880694, "grad_norm": 0.1567569226026535, "wall_ms": 336354}
{"step": 24600, "loss": 0.27132949233055115, "loss_nce": 0.27107536792755127, "loss_mse": 0.0025412787217646837, "lr": 0.00047028964876300565, "grad_norm": 0.13002675771713257, "wall_ms": 337596}
{"step": 24700, "loss": 0.2759617567062378, "loss_nce": 0.2757112383842468, "loss_mse": 0.002505069598555565, "lr": 0.0004700426044536315, "grad_norm": 0.1357131153345108, "wall_ms": 338842}
{"step": 24800, "loss": 0.29161667823791504, "loss_nce": 0.2913641333580017, "loss_mse": 0.00252533215098083, "lr": 0.0004697946041581985, "grad_norm": 0.1454639732837677, "wall_ms": 340087}
{"step": 24900, "loss": 0.28954267501831055, "loss_nce": 0.28929442167282104, "loss_mse": 0.00248250481672585, "lr": 0.00046954564897920846, "grad_norm": 0.1295367181301117, "wall_ms": 341330}
{"step": 25000, "loss": 0.28402140736579895, "loss_nce": 0.2837710976600647, "loss_mse": 0.002503006486222148, "lr": 0.0004692957400234079, "grad_norm": 0.13783575594425201, "wall_ms": 342575}
{"step": 25100, "loss": 0.2962953448295593, "loss_nce": 0.29604122042655945, "loss_mse": 0.0025411597453057766, "lr": 0.0004690448784017836, "grad_norm": 0.14998891949653625, "wall_ms": 343820}
{"step": 25200, "loss": 0.2706693708896637, "loss_nce": 0.27041518688201904, "loss_mse": 0.0025418431032449007, "lr": 0.0004687930652295574, "grad_norm": 0.13123995065689087, "wall_ms": 345064}
{"step": 25300, "loss": 0.27790045738220215, "loss_nce": 0.2776471972465515, "loss_mse": 0.0025327245239168406, "lr": 0.00046854030162618126, "grad_norm": 0.1301887184381485, "wall_ms": 346308}
{"step": 25400, "loss": 0.2716759741306305, "loss_nce": 0.27142661809921265, "loss_mse": 0.0024934676475822926, "lr": 0.0004682865887153326, "grad_norm": 0.1373104602098465, "wall_ms": 347550}
{"step": 25500, "loss": 0.33030205965042114, "loss_nce": 0.33004647493362427, "loss_mse": 0.002555826911702752, "lr": 0.0004680319276249087, "grad_norm": 0.13737216591835022, "wall_ms": 348793}
{"step": 25600, "loss": 0.2785640358924866, "loss_nce": 0.2783098816871643, "loss_mse": 0.0025414780247956514, "lr": 0.00046777631948702233, "grad_norm": 0.14200246334075928, "wall_ms": 350037}
{"step": 25700, "loss": 0.258714497089386, "loss_nce": 0.2584644556045532, "loss_mse": 0.0025004472117871046, "lr": 0.00046751976543799625, "grad_norm": 0.13464194536209106, "wall_ms": 351275}
{"step": 25800, "loss": 0.27193543314933777, "loss_nce": 0.2716798186302185, "loss_mse": 0.002556148450821638, "lr": 0.0004672622666183583, "grad_norm": 0.13643185794353485, "wall_ms": 352514}
{"step": 25900, "loss": 0.2754662334918976, "loss_nce": 0.275212824344635, "loss_mse": 0.002534155733883381, "lr": 0.00046700382417283655, "grad_norm": 0.1362156718969345, "wall_ms": 353758}
{"step": 26000, "loss": 0.32322433590888977, "loss_nce": 0.32297074794769287, "loss_mse": 0.002535782288759947, "lr": 0.00046674443925035387, "grad_norm": 0.15312638878822327, "wall_ms": 355003}
{"step": 26100, "loss": 0.2726978659629822, "loss_nce": 0.27244406938552856, "loss_mse": 0.002538057044148445, "lr": 0.00046648411300402305, "grad_norm": 0.13686563074588776, "wall_ms": 356247}
{"step": 26200, "loss": 0.2627163827419281, "loss_nce": 0.2624662518501282, "loss_mse": 0.002501380629837513, "lr": 0.00046622284659114156, "grad_norm": 0.14763376116752625, "wall_ms": 357494}
{"step": 26300, "loss": 0.29456841945648193, "loss_nce": 0.2943161129951477, "loss_mse": 0.0025231624022126198, "lr": 0.00046596064117318634, "grad_norm": 0.13811573386192322, "wall_ms": 358741}
{"step": 26400, "loss": 0.2982479929924011, "loss_nce": 0.2979907691478729, "loss_mse": 0.0025720945559442043, "lr": 0.00046569749791580895, "grad_norm": 0.14140264689922333, "wall_ms": 359993}
{"step": 26500, "loss": 0.2739611268043518, "loss_nce": 0.27371272444725037, "loss_mse": 0.002483922988176346, "lr": 0.00046543341798883003, "grad_norm": 0.12974043190479279, "wall_ms": 361250}
{"step": 26600, "loss": 0.25466635823249817, "loss_nce": 0.2544168531894684, "loss_mse": 0.0024951468221843243, "lr": 0.0004651684025662343, "grad_norm": 0.12136510759592056, "wall_ms": 362505}
{"step": 26700, "loss": 0.283005952835083, "loss_nce": 0.2827513515949249, "loss_mse": 0.002545942086726427, "lr": 0.0004649024528261653, "grad_norm": 0.13901551067829132, "wall_ms": 363745}
{"step": 26800, "loss": 0.27152636647224426, "loss_nce": 0.2712703347206116, "loss_mse": 0.0025604397524148226, "lr": 0.00046463556995092, "grad_norm": 0.12515045702457428, "wall_ms": 364995}
{"step": 26900, "loss": 0.26262688636779785, "loss_nce": 0.26237303018569946, "loss_mse": 0.0025386675260961056, "lr": 0.00046436775512694387, "grad_norm": 0.13298341631889343, "wall_ms": 366240}
{"step": 27000, "loss": 0.28667008876800537, "loss_nce": 0.2864195704460144, "loss_mse": 0.002505039796233177, "lr": 0.00046409900954482537, "grad_norm": 0.1408763974905014, "wall_ms": 367496}
{"step": 27100, "loss": 0.29924580454826355, "loss_nce": 0.29899269342422485, "loss_mse": 0.0025311713106930256, "lr": 0.0004638293343992907, "grad_norm": 0.13915148377418518, "wall_ms": 368766}
{"step": 27200, "loss": 0.31839680671691895, "loss_nce": 0.3181423246860504, "loss_mse": 0.0025446887593716383, "lr": 0.0004635587308891984, "grad_norm": 0.14760416746139526, "wall_ms": 370037}
{"step": 27300, "loss": 0.2916516065597534, "loss_nce": 0.29139894247055054, "loss_mse": 0.002526710042729974, "lr": 0.0004632872002175342, "grad_norm": 0.1617022007703781, "wall_ms": 371319}
{"step": 27400, "loss": 0.2692497670650482, "loss_nce": 0.26900166273117065, "loss_mse": 0.0024810009635984898, "lr": 0.00046301474359140566, "grad_norm": 0.14043019711971283, "wall_ms": 372596}
{"step": 27500, "loss": 0.28032219409942627, "loss_nce": 0.2800702154636383, "loss_mse": 0.0025196520145982504, "lr": 0.00046274136222203657, "grad_norm": 0.13554507493972778, "wall_ms": 373854}
{"step": 27600, "loss": 0.3140731155872345, "loss_nce": 0.3138188123703003, "loss_mse": 0.002543154638260603, "lr": 0.00046246705732476184, "grad_norm": 0.14889466762542725, "wall_ms": 375102}
{"step": 27700, "loss": 0.28495630621910095, "loss_nce": 0.28470689058303833, "loss_mse": 0.002494069514796138, "lr": 0.00046219183011902195, "grad_norm": 0.14300841093063354, "wall_ms": 376347}
{"step": 27800, "loss": 0.3068263530731201, "loss_nce": 0.3065701127052307, "loss_mse": 0.0025623980909585953, "lr": 0.0004619156818283576, "grad_norm": 0.14385557174682617, "wall_ms": 377585}
{"step": 27900, "loss": 0.2525825500488281, "loss_nce": 0.2523302137851715, "loss_mse": 0.0025233523920178413, "lr": 0.00046163861368040424, "grad_norm": 0.13502137362957, "wall_ms": 378827}
{"step": 28000, "loss": 0.3117888569831848, "loss_nce": 0.31153881549835205, "loss_mse": 0.0025003142654895782, "lr": 0.00046136062690688633, "grad_norm": 0.15716740489006042, "wall_ms": 380080}
{"step": 28100, "loss": 0.2757123112678528, "loss_nce": 0.2754603624343872, "loss_mse": 0.0025193875189870596, "lr": 0.0004610817227436127, "grad_norm": 0.14438602328300476, "wall_ms": 381354}
{"step": 28200, "loss": 0.2693006992340088, "loss_nce": 0.26904821395874023, "loss_mse": 0.0025248455349355936, "lr": 0.0004608019024304699, "grad_norm": 0.13444015383720398, "wall_ms": 382624}
{"step": 28300, "loss": 0.23750703036785126, "loss_nce": 0.23725080490112305, "loss_mse": 0.0025622276589274406, "lr": 0.00046052116721141787, "grad_norm": 0.13671362400054932, "wall_ms": 383900}
{"step": 28400, "loss": 0.26054397225379944, "loss_nce": 0.26029568910598755, "loss_mse": 0.0024827918969094753, "lr": 0.0004602395183344831, "grad_norm": 0.1343671679496765, "wall_ms": 385171}
{"step": 28500, "loss": 0.2645616829395294, "loss_nce": 0.2643154263496399, "loss_mse": 0.0024624906945973635, "lr": 0.0004599569570517546, "grad_norm": 0.13162656128406525, "wall_ms": 386427}
{"step": 28600, "loss": 0.2827874720096588, "loss_nce": 0.28253576159477234, "loss_mse": 0.0025172107852995396, "lr": 0.000459673484619377, "grad_norm": 0.1465512216091156, "wall_ms": 387683}
{"step": 28700, "loss": 0.2675083577632904, "loss_nce": 0.2672598958015442, "loss_mse": 0.0024844929575920105, "lr": 0.00045938910229754553, "grad_norm": 0.1476411372423172, "wall_ms": 388946}
{"step": 28800, "loss": 0.24208375811576843, "loss_nce": 0.24183389544487, "loss_mse": 0.0024986558128148317, "lr": 0.00045910381135050077, "grad_norm": 0.123293936252594, "wall_ms": 390212}
{"step": 28900, "loss": 0.29206931591033936, "loss_nce": 0.29181575775146484, "loss_mse": 0.0025356761179864407, "lr": 0.00045881761304652233, "grad_norm": 0.15230178833007812, "wall_ms": 391481}
{"step": 29000, "loss": 0.27693238854408264, "loss_nce": 0.27668097615242004, "loss_mse": 0.002514057792723179, "lr": 0.00045853050865792363, "grad_norm": 0.1375616192817688, "wall_ms": 392752}
{"step": 29100, "loss": 0.26413530111312866, "loss_nce": 0.2638835906982422, "loss_mse": 0.0025170985609292984, "lr": 0.000458242499461046, "grad_norm": 0.13447707891464233, "wall_ms": 394020}
{"step": 29200, "loss": 0.2622791826725006, "loss_nce": 0.26202893257141113, "loss_mse": 0.002502602757886052, "lr": 0.0004579535867362534, "grad_norm": 0.13112463057041168, "wall_ms": 395281}
{"step": 29300, "loss": 0.2527391016483307, "loss_nce": 0.2524924874305725, "loss_mse": 0.0024660928174853325, "lr": 0.00045766377176792634, "grad_norm": 0.151163712143898, "wall_ms": 396547}
{"step": 29400, "loss": 0.3022598624229431, "loss_nce": 0.302004337310791, "loss_mse": 0.002555391751229763, "lr": 0.0004573730558444565, "grad_norm": 0.148587167263031, "wall_ms": 397811}
{"step": 29500, "loss": 0.2649521231651306, "loss_nce": 0.2647058963775635, "loss_mse": 0.0024623440112918615, "lr": 0.0004570814402582404, "grad_norm": 0.1329886019229889, "wall_ms": 399082}
{"step": 29600, "loss": 0.2808622419834137, "loss_nce": 0.28061074018478394, "loss_mse": 0.002514925552532077, "lr": 0.00045678892630567454, "grad_norm": 0.14299103617668152, "wall_ms": 400340}
{"step": 29700, "loss": 0.27077075839042664, "loss_nce": 0.27052041888237, "loss_mse": 0.002503487514331937, "lr": 0.00045649551528714885, "grad_norm": 0.1314534842967987, "wall_ms": 401596}
{"step": 29800, "loss": 0.27555614709854126, "loss_nce": 0.2753041386604309, "loss_mse": 0.002520100912079215, "lr": 0.0004562012085070413, "grad_norm": 0.1385328322649002, "wall_ms": 402867}
{"step": 29900, "loss": 0.2909488081932068, "loss_nce": 0.2906939685344696, "loss_mse": 0.0025483446661382914, "lr": 0.00045590600727371224, "grad_norm": 0.1525048464536667, "wall_ms": 404139}
{"step": 30000, "loss": 0.30098992586135864, "loss_nce": 0.3007367253303528, "loss_mse": 0.00253200251609087, "lr": 0.00045560991289949807, "grad_norm": 0.14773358404636383, "wall_ms": 405404}
{"step": 30100, "loss": 0.27422961592674255, "loss_nce": 0.2739796042442322, "loss_mse": 0.002500237198546529, "lr": 0.00045531292670070574, "grad_norm": 0.13742269575595856, "wall_ms": 423180}
{"step": 30200, "loss": 0.28133320808410645, "loss_nce": 0.281083881855011, "loss_mse": 0.0024931577499955893, "lr": 0.00045501504999760715, "grad_norm": 0.13353753089904785, "wall_ms": 424440}
{"step": 30300, "loss": 0.29292070865631104, "loss_nce": 0.2926674783229828, "loss_mse": 0.0025321978610008955, "lr": 0.0004547162841144326, "grad_norm": 0.14519137144088745, "wall_ms": 425698}
{"step": 30400, "loss": 0.2680718004703522, "loss_nce": 0.267816960811615, "loss_mse": 0.002548545366153121, "lr": 0.0004544166303793657, "grad_norm": 0.12696373462677002, "wall_ms": 426956}
{"step": 30500, "loss": 0.23165017366409302, "loss_nce": 0.23140272498130798, "loss_mse": 0.0024744956754148006, "lr": 0.0004541160901245367, "grad_norm": 0.12075475603342056, "wall_ms": 428196}
{"step": 30600, "loss": 0.2894055247306824, "loss_nce": 0.28915584087371826, "loss_mse": 0.0024967407807707787, "lr": 0.0004538146646860172, "grad_norm": 0.133382186293602, "wall_ms": 429435}
{"step": 30700, "loss": 0.29622551798820496, "loss_nce": 0.2959757447242737, "loss_mse": 0.0024977647699415684, "lr": 0.00045351235540381363, "grad_norm": 0.15392087399959564, "wall_ms": 430673}
{"step": 30800, "loss": 0.26264122128486633, "loss_nce": 0.2623867392539978, "loss_mse": 0.002544943941757083, "lr": 0.00045320916362186214, "grad_norm": 0.14296986162662506, "wall_ms": 431909}
{"step": 30900, "loss": 0.28651514649391174, "loss_nce": 0.28625673055648804, "loss_mse": 0.002584078349173069, "lr": 0.0004529050906880216, "grad_norm": 0.1388600617647171, "wall_ms": 433147}
{"step": 31000, "loss": 0.28229203820228577, "loss_nce": 0.2820409834384918, "loss_mse": 0.002510402351617813, "lr": 0.00045260013795406814, "grad_norm": 0.13250043988227844, "wall_ms": 434390}
{"step": 31100, "loss": 0.30693531036376953, "loss_nce": 0.30667632818222046, "loss_mse": 0.0025899072643369436, "lr": 0.0004522943067756894, "grad_norm": 0.15050575137138367, "wall_ms": 435633}
{"step": 31200, "loss": 0.3154345154762268, "loss_nce": 0.3151806592941284, "loss_mse": 0.0025385194458067417, "lr": 0.0004519875985124779, "grad_norm": 0.1554083675146103, "wall_ms": 436881}
{"step": 31300, "loss": 0.2660757899284363, "loss_nce": 0.265823096036911, "loss_mse": 0.0025268681347370148, "lr": 0.0004516800145279255, "grad_norm": 0.14030997455120087, "wall_ms": 438129}
{"step": 31400, "loss": 0.28580912947654724, "loss_nce": 0.28555333614349365, "loss_mse": 0.002557998988777399, "lr": 0.00045137155618941706, "grad_norm": 0.1360362470149994, "wall_ms": 439376}
{"step": 31500, "loss": 0.2870893180370331, "loss_nce": 0.2868366241455078, "loss_mse": 0.0025268595200031996, "lr": 0.0004510622248682244, "grad_norm": 0.13621625304222107, "wall_ms": 440622}
{"step": 31600, "loss": 0.2613238990306854, "loss_nce": 0.26107048988342285, "loss_mse": 0.0025341722648590803, "lr": 0.00045075202193950025, "grad_norm": 0.13245736062526703, "wall_ms": 441870}
{"step": 31700, "loss": 0.23572969436645508, "loss_nce": 0.23547467589378357, "loss_mse": 0.0025501262862235308, "lr": 0.0004504409487822723, "grad_norm": 0.1272680163383484, "wall_ms": 443121}
{"step": 31800, "loss": 0.23145851492881775, "loss_nce": 0.23121152818202972, "loss_mse": 0.002469876082614064, "lr": 0.00045012900677943645, "grad_norm": 0.12186974287033081, "wall_ms": 444380}
{"step": 31900, "loss": 0.25248631834983826, "loss_nce": 0.25223830342292786, "loss_mse": 0.0024801003746688366, "lr": 0.00044981619731775173, "grad_norm": 0.1319860965013504, "wall_ms": 445631}
{"step": 32000, "loss": 0.27897679805755615, "loss_nce": 0.2787213921546936, "loss_mse": 0.0025539242196828127, "lr": 0.00044950252178783316, "grad_norm": 0.13910536468029022, "wall_ms": 446876}
{"step": 32100, "loss": 0.26098382472991943, "loss_nce": 0.26073306798934937, "loss_mse": 0.002507491735741496, "lr": 0.0004491879815841458, "grad_norm": 0.12863010168075562, "wall_ms": 448116}
{"step": 32200, "loss": 0.27686285972595215, "loss_nce": 0.2766086161136627, "loss_mse": 0.0025425185449421406, "lr": 0.00044887257810499894, "grad_norm": 0.15183040499687195, "wall_ms": 449363}
{"step": 32300, "loss": 0.28255927562713623, "loss_nce": 0.2823109030723572, "loss_mse": 0.002483685966581106, "lr": 0.00044855631275253954, "grad_norm": 0.1316392719745636, "wall_ms": 450619}
{"step": 32400, "loss": 0.27322542667388916, "loss_nce": 0.2729743421077728, "loss_mse": 0.0025108850095421076, "lr": 0.000448239186932746, "grad_norm": 0.14551997184753418, "wall_ms": 451870}
{"step": 32500, "loss": 0.25442737340927124, "loss_nce": 0.25417661666870117, "loss_mse": 0.002507453318685293, "lr": 0.000447921202055422, "grad_norm": 0.15570992231369019, "wall_ms": 453116}
{"step": 32600, "loss": 0.24820567667484283, "loss_nce": 0.24795734882354736, "loss_mse": 0.0024833499919623137, "lr": 0.0004476023595341903, "grad_norm": 0.12534552812576294, "wall_ms": 454365}
{"step": 32700, "loss": 0.26764532923698425, "loss_nce": 0.26739394664764404, "loss_mse": 0.0025137318298220634, "lr": 0.00044728266078648636, "grad_norm": 0.13771697878837585, "wall_ms": 455621}
{"step": 32800, "loss": 0.2083246111869812, "loss_nce": 0.2080753743648529, "loss_mse": 0.0024923202581703663, "lr": 0.00044696210723355203, "grad_norm": 0.12057893723249435, "wall_ms": 456887}
{"step": 32900, "loss": 0.29127344489097595, "loss_nce": 0.29101648926734924, "loss_mse": 0.002569430274888873, "lr": 0.00044664070030042913, "grad_norm": 0.1437286138534546, "wall_ms": 458150}
{"step": 33000, "loss": 0.2695710062980652, "loss_nce": 0.269315242767334, "loss_mse": 0.0025577815249562263, "lr": 0.00044631844141595334, "grad_norm": 0.14274238049983978, "wall_ms": 459420}
{"step": 33100, "loss": 0.2856757342815399, "loss_nce": 0.2854230999946594, "loss_mse": 0.0025262825656682253, "lr": 0.0004459953320127479, "grad_norm": 0.14129476249217987, "wall_ms": 460694}
{"step": 33200, "loss": 0.25493863224983215, "loss_nce": 0.2546851634979248, "loss_mse": 0.002534752245992422, "lr": 0.00044567137352721683, "grad_norm": 0.13715007901191711, "wall_ms": 461971}
{"step": 33300, "loss": 0.24477912485599518, "loss_nce": 0.24453173577785492, "loss_mse": 0.0024738276842981577, "lr": 0.0004453465673995389, "grad_norm": 0.12701661884784698, "wall_ms": 463232}
{"step": 33400, "loss": 0.2898089289665222, "loss_nce": 0.2895587682723999, "loss_mse": 0.0025016877334564924, "lr": 0.0004450209150736613, "grad_norm": 0.1292915791273117, "wall_ms": 464489}
{"step": 33500, "loss": 0.28327620029449463, "loss_nce": 0.2830212712287903, "loss_mse": 0.002549312077462673, "lr": 0.0004446944179972926, "grad_norm": 0.1341703087091446, "wall_ms": 465781}
{"step": 33600, "loss": 0.2857072353363037, "loss_nce": 0.285452663898468, "loss_mse": 0.002545589581131935, "lr": 0.0004443670776218973, "grad_norm": 0.14102081954479218, "wall_ms": 467077}
{"step": 33700, "loss": 0.26799362897872925, "loss_nce": 0.2677391767501831, "loss_mse": 0.0025445003993809223, "lr": 0.00044403889540268847, "grad_norm": 0.13024379312992096, "wall_ms": 468319}
{"step": 33800, "loss": 0.2679850459098816, "loss_nce": 0.2677385210990906, "loss_mse": 0.002465164056047797, "lr": 0.00044370987279862187, "grad_norm": 0.1346324235200882, "wall_ms": 469556}
{"step": 33900, "loss": 0.27896031737327576, "loss_nce": 0.2787061929702759, "loss_mse": 0.002541294787079096, "lr": 0.0004433800112723891, "grad_norm": 0.1447286158800125, "wall_ms": 470798}
{"step": 34000, "loss": 0.25585272908210754, "loss_nce": 0.25560545921325684, "loss_mse": 0.0024726621340960264, "lr": 0.0004430493122904113, "grad_norm": 0.13835862278938293, "wall_ms": 472042}
{"step": 34100, "loss": 0.27697083353996277, "loss_nce": 0.27672216296195984, "loss_mse": 0.0024867583997547626, "lr": 0.0004427177773228325, "grad_norm": 0.1408596783876419, "wall_ms": 473288}
{"step": 34200, "loss": 0.3050183653831482, "loss_nce": 0.3047656714916229, "loss_mse": 0.00252704625017941, "lr": 0.00044238540784351336, "grad_norm": 0.15255188941955566, "wall_ms": 474545}
{"step": 34300, "loss": 0.287700891494751, "loss_nce": 0.2874479293823242, "loss_mse": 0.002529545221477747, "lr": 0.00044205220533002416, "grad_norm": 0.14449442923069, "wall_ms": 475807}
{"step": 34400, "loss": 0.2683241367340088, "loss_nce": 0.26807838678359985, "loss_mse": 0.0024574571289122105, "lr": 0.00044171817126363865, "grad_norm": 0.14641089737415314, "wall_ms": 477069}
{"step": 34500, "loss": 0.3003206253051758, "loss_nce": 0.3000694215297699, "loss_mse": 0.0025120186619460583, "lr": 0.0004413833071293274, "grad_norm": 0.14233998954296112, "wall_ms": 478332}
{"step": 34600, "loss": 0.2925826907157898, "loss_nce": 0.29233434796333313, "loss_mse": 0.0024834279902279377, "lr": 0.0004410476144157509, "grad_norm": 0.1479896754026413, "wall_ms": 479597}
{"step": 34700, "loss": 0.2787720859050751, "loss_nce": 0.2785152196884155, "loss_mse": 0.0025687229353934526, "lr": 0.000440711094615253, "grad_norm": 0.1362948715686798, "wall_ms": 480859}
{"step": 34800, "loss": 0.2741716504096985, "loss_nce": 0.273918092250824, "loss_mse": 0.0025354453828185797, "lr": 0.0004403737492238549, "grad_norm": 0.13221260905265808, "wall_ms": 482121}
{"step": 34900, "loss": 0.28072142601013184, "loss_nce": 0.2804705500602722, "loss_mse": 0.0025088656693696976, "lr": 0.0004400355797412478, "grad_norm": 0.1397557556629181, "wall_ms": 483380}
{"step": 35000, "loss": 0.28203529119491577, "loss_nce": 0.2817894220352173, "loss_mse": 0.002458741655573249, "lr": 0.0004396965876707861, "grad_norm": 0.14019973576068878, "wall_ms": 484645}
{"step": 35100, "loss": 0.27325206995010376, "loss_nce": 0.27300262451171875, "loss_mse": 0.0024945377372205257, "lr": 0.0004393567745194816, "grad_norm": 0.13893675804138184, "wall_ms": 485915}
{"step": 35200, "loss": 0.2853909432888031, "loss_nce": 0.28514084219932556, "loss_mse": 0.0025010458193719387, "lr": 0.000439016141797996, "grad_norm": 0.14358974993228912, "wall_ms": 487184}
{"step": 35300, "loss": 0.2830928564071655, "loss_nce": 0.2828427255153656, "loss_mse": 0.0025013466365635395, "lr": 0.00043867469102063444, "grad_norm": 0.14301279187202454, "wall_ms": 488453}
{"step": 35400, "loss": 0.2511541545391083, "loss_nce": 0.25089967250823975, "loss_mse": 0.002544689690694213, "lr": 0.00043833242370533887, "grad_norm": 0.11611019819974899, "wall_ms": 489720}
{"step": 35500, "loss": 0.2858252227306366, "loss_nce": 0.2855686843395233, "loss_mse": 0.002565238857641816, "lr": 0.00043798934137368123, "grad_norm": 0.1386033445596695, "wall_ms": 490982}
{"step": 35600, "loss": 0.2520877718925476, "loss_nce": 0.25183406472206116, "loss_mse": 0.002537036780267954, "lr": 0.0004376454455508566, "grad_norm": 0.13657626509666443, "wall_ms": 492271}
{"step": 35700, "loss": 0.2756311297416687, "loss_nce": 0.2753811180591583, "loss_mse": 0.002500127535313368, "lr": 0.00043730073776567635, "grad_norm": 0.14852382242679596, "wall_ms": 493612}
{"step": 35800, "loss": 0.31155166029930115, "loss_nce": 0.31129777431488037, "loss_mse": 0.00253895646892488, "lr": 0.00043695521955056186, "grad_norm": 0.13961268961429596, "wall_ms": 494944}
{"step": 35900, "loss": 0.27390867471694946, "loss_nce": 0.27365973591804504, "loss_mse": 0.0024895176757127047, "lr": 0.00043660889244153696, "grad_norm": 0.1351022571325302, "wall_ms": 496278}
{"step": 36000, "loss": 0.27505114674568176, "loss_nce": 0.27479857206344604, "loss_mse": 0.0025256709195673466, "lr": 0.0004362617579782217, "grad_norm": 0.1334162801504135, "wall_ms": 497610}
{"step": 36100, "loss": 0.24519188702106476, "loss_nce": 0.24493727087974548, "loss_mse": 0.0025461530312895775, "lr": 0.0004359138177038252, "grad_norm": 0.13491204380989075, "wall_ms": 498943}
{"step": 36200, "loss": 0.2978513240814209, "loss_nce": 0.29760441184043884, "loss_mse": 0.0024691708385944366, "lr": 0.0004355650731651388, "grad_norm": 0.14512234926223755, "wall_ms": 500265}
{"step": 36300, "loss": 0.24854683876037598, "loss_nce": 0.24829691648483276, "loss_mse": 0.00249916291795671, "lr": 0.00043521552591252926, "grad_norm": 0.12222205847501755, "wall_ms": 501594}
{"step": 36400, "loss": 0.2750459909439087, "loss_nce": 0.27479398250579834, "loss_mse": 0.0025201833341270685, "lr": 0.00043486517749993206, "grad_norm": 0.1380145251750946, "wall_ms": 502862}
{"step": 36500, "loss": 0.26555120944976807, "loss_nce": 0.2653006911277771, "loss_mse": 0.0025051350239664316, "lr": 0.00043451402948484403, "grad_norm": 0.13797487318515778, "wall_ms": 504127}
{"step": 36600, "loss": 0.28780239820480347, "loss_nce": 0.2875446081161499, "loss_mse": 0.002578033832833171, "lr": 0.00043416208342831686, "grad_norm": 0.14141391217708588, "wall_ms": 505403}
{"step": 36700, "loss": 0.27490517497062683, "loss_nce": 0.274649441242218, "loss_mse": 0.002557355910539627, "lr": 0.00043380934089494997, "grad_norm": 0.15022170543670654, "wall_ms": 506667}
{"step": 36800, "loss": 0.29177218675613403, "loss_nce": 0.2915215492248535, "loss_mse": 0.0025062267668545246, "lr": 0.00043345580345288334, "grad_norm": 0.14460088312625885, "wall_ms": 507947}
{"step": 36900, "loss": 0.28964224457740784, "loss_nce": 0.2893926501274109, "loss_mse": 0.002495958935469389, "lr": 0.0004331014726737911, "grad_norm": 0.14591825008392334, "wall_ms": 509217}
{"step": 37000, "loss": 0.259192556142807, "loss_nce": 0.2589452862739563, "loss_mse": 0.002472737105563283, "lr": 0.0004327463501328739, "grad_norm": 0.13099034130573273, "wall_ms": 510505}
{"step": 37100, "loss": 0.3333771228790283, "loss_nce": 0.33312588930130005, "loss_mse": 0.002512267790734768, "lr": 0.0004323904374088526, "grad_norm": 0.1654055118560791, "wall_ms": 511799}
{"step": 37200, "loss": 0.2410980463027954, "loss_nce": 0.2408483922481537, "loss_mse": 0.0024965102784335613, "lr": 0.0004320337360839605, "grad_norm": 0.11821479350328445, "wall_ms": 513078}
{"step": 37300, "loss": 0.29465463757514954, "loss_nce": 0.2943989634513855, "loss_mse": 0.0025568034034222364, "lr": 0.00043167624774393694, "grad_norm": 0.1387961059808731, "wall_ms": 514342}
{"step": 37400, "loss": 0.2869778275489807, "loss_nce": 0.28672850131988525, "loss_mse": 0.0024932099040597677, "lr": 0.0004313179739780198, "grad_norm": 0.13584040105342865, "wall_ms": 515610}
{"step": 37500, "loss": 0.30556538701057434, "loss_nce": 0.3053145408630371, "loss_mse": 0.0025085899978876114, "lr": 0.00043095891637893866, "grad_norm": 0.14352087676525116, "wall_ms": 516880}
{"step": 37600, "loss": 0.26063644886016846, "loss_nce": 0.26038965582847595, "loss_mse": 0.0024679473135620356, "lr": 0.00043059907654290774, "grad_norm": 0.13794700801372528, "wall_ms": 518172}
{"step": 37700, "loss": 0.24357277154922485, "loss_nce": 0.24332715570926666, "loss_mse": 0.002456217771396041, "lr": 0.0004302384560696188, "grad_norm": 0.135965034365654, "wall_ms": 519466}
{"step": 37800, "loss": 0.2564140260219574, "loss_nce": 0.25616323947906494, "loss_mse": 0.0025077280588448048, "lr": 0.00042987705656223385, "grad_norm": 0.13008084893226624, "wall_ms": 520767}
{"step": 37900, "loss": 0.2595241069793701, "loss_nce": 0.2592724859714508, "loss_mse": 0.0025161043740808964, "lr": 0.00042951487962737824, "grad_norm": 0.13541944324970245, "wall_ms": 522076}
{"step": 38000, "loss": 0.2617917060852051, "loss_nce": 0.26153886318206787, "loss_mse": 0.002528340555727482, "lr": 0.00042915192687513335, "grad_norm": 0.13778267800807953, "wall_ms": 523354}
{"step": 38100, "loss": 0.2684234082698822, "loss_nce": 0.26817208528518677, "loss_mse": 0.002513172337785363, "lr": 0.0004287881999190295, "grad_norm": 0.12952697277069092, "wall_ms": 524639}
{"step": 38200, "loss": 0.24834385514259338, "loss_nce": 0.24809680879116058, "loss_mse": 0.002470459556207061, "lr": 0.0004284237003760389, "grad_norm": 0.13080494105815887, "wall_ms": 525944}
{"step": 38300, "loss": 0.2692653238773346, "loss_nce": 0.26901066303253174, "loss_mse": 0.00254665850661695, "lr": 0.0004280584298665682, "grad_norm": 0.13462947309017181, "wall_ms": 527228}
{"step": 38400, "loss": 0.27793118357658386, "loss_nce": 0.2776764929294586, "loss_mse": 0.002547029173001647, "lr": 0.00042769239001445153, "grad_norm": 0.14411970973014832, "wall_ms": 528500}
{"step": 38500, "loss": 0.25563469529151917, "loss_nce": 0.25537949800491333, "loss_mse": 0.002552029909566045, "lr": 0.0004273255824469432, "grad_norm": 0.12838564813137054, "wall_ms": 529788}
{"step": 38600, "loss": 0.28298044204711914, "loss_nce": 0.28272661566734314, "loss_mse": 0.0025382658932358027, "lr": 0.00042695800879471036, "grad_norm": 0.1372639238834381, "wall_ms": 531061}
{"step": 38700, "loss": 0.2292727828025818, "loss_nce": 0.2290286123752594, "loss_mse": 0.0024416642263531685, "lr": 0.0004265896706918259, "grad_norm": 0.1194126307964325, "wall_ms": 532316}
{"step": 38800, "loss": 0.2830027639865875, "loss_nce": 0.2827532887458801, "loss_mse": 0.002494841581210494, "lr": 0.0004262205697757612, "grad_norm": 0.1449250876903534, "wall_ms": 533565}
{"step": 38900, "loss": 0.28305432200431824, "loss_nce": 0.28279978036880493, "loss_mse": 0.0025453385896980762, "lr": 0.00042585070768737845, "grad_norm": 0.14251121878623962, "wall_ms": 534806}
{"step": 39000, "loss": 0.23161809146404266, "loss_nce": 0.23136979341506958, "loss_mse": 0.002483042422682047, "lr": 0.0004254800860709241, "grad_norm": 0.11422480642795563, "wall_ms": 536044}
{"step": 39100, "loss": 0.24898894131183624, "loss_nce": 0.24873602390289307, "loss_mse": 0.0025292369537055492, "lr": 0.00042510870657402097, "grad_norm": 0.13195155560970306, "wall_ms": 537285}
{"step": 39200, "loss": 0.2745080888271332, "loss_nce": 0.2742583155632019, "loss_mse": 0.002497688867151737, "lr": 0.000424736570847661, "grad_norm": 0.1525016725063324, "wall_ms": 538526}
{"step": 39300, "loss": 0.26990050077438354, "loss_nce": 0.2696496248245239, "loss_mse": 0.002508745761588216, "lr": 0.00042436368054619804, "grad_norm": 0.1605965793132782, "wall_ms": 539764}
{"step": 39400, "loss": 0.24071381986141205, "loss_nce": 0.2404640167951584, "loss_mse": 0.0024980094749480486, "lr": 0.0004239900373273406, "grad_norm": 0.12004563212394714, "wall_ms": 541006}
{"step": 39500, "loss": 0.25628775358200073, "loss_nce": 0.2560369670391083, "loss_mse": 0.0025078465696424246, "lr": 0.00042361564285214404, "grad_norm": 0.12414488941431046, "wall_ms": 542245}
{"step": 39600, "loss": 0.2829316556453705, "loss_nce": 0.2826799750328064, "loss_mse": 0.002516705309972167, "lr": 0.0004232404987850039, "grad_norm": 0.1452670842409134, "wall_ms": 543484}
{"step": 39700, "loss": 0.283442884683609, "loss_nce": 0.28319019079208374, "loss_mse": 0.0025270269252359867, "lr": 0.0004228646067936475, "grad_norm": 0.1352480947971344, "wall_ms": 544726}
{"step": 39800, "loss": 0.27595290541648865, "loss_nce": 0.2757018804550171, "loss_mse": 0.0025102258659899235, "lr": 0.00042248796854912776, "grad_norm": 0.1361416131258011, "wall_ms": 545977}
{"step": 39900, "loss": 0.2634161114692688, "loss_nce": 0.26316505670547485, "loss_mse": 0.002510449616238475, "lr": 0.0004221105857258146, "grad_norm": 0.13788071274757385, "wall_ms": 547229}
{"step": 40000, "loss": 0.27788156270980835, "loss_nce": 0.27763208746910095, "loss_mse": 0.0024947344791144133, "lr": 0.00042173246000138824, "grad_norm": 0.14313602447509766, "wall_ms": 548486}
{"step": 40100, "loss": 0.3072395622730255, "loss_nce": 0.3069833517074585, "loss_mse": 0.0025621831882745028, "lr": 0.0004213535930568317, "grad_norm": 0.1538955420255661, "wall_ms": 561706}
{"step": 40200, "loss": 0.2742522954940796, "loss_nce": 0.27400049567222595, "loss_mse": 0.0025180738884955645, "lr": 0.0004209739865764226, "grad_norm": 0.13196054100990295, "wall_ms": 562944}
{"step": 40300, "loss": 0.2578528821468353, "loss_nce": 0.25760024785995483, "loss_mse": 0.0025263032875955105, "lr": 0.0004205936422477267, "grad_norm": 0.12688741087913513, "wall_ms": 564185}
{"step": 40400, "loss": 0.2785269021987915, "loss_nce": 0.2782744765281677, "loss_mse": 0.0025243028067052364, "lr": 0.00042021256176158977, "grad_norm": 0.13662980496883392, "wall_ms": 565422}
{"step": 40500, "loss": 0.2853080928325653, "loss_nce": 0.2850547432899475, "loss_mse": 0.0025336286053061485, "lr": 0.0004198307468121303, "grad_norm": 0.14669136703014374, "wall_ms": 566685}
{"step": 40600, "loss": 0.2757102847099304, "loss_nce": 0.2754577398300171, "loss_mse": 0.0025253889616578817, "lr": 0.0004194481990967316, "grad_norm": 0.14120206236839294, "wall_ms": 567935}
{"step": 40700, "loss": 0.25748518109321594, "loss_nce": 0.25723546743392944, "loss_mse": 0.0024970383383333683, "lr": 0.0004190649203160348, "grad_norm": 0.13429144024848938, "wall_ms": 569171}
{"step": 40800, "loss": 0.2252916842699051, "loss_nce": 0.22503983974456787, "loss_mse": 0.002518427325412631, "lr": 0.00041868091217393095, "grad_norm": 0.1226973831653595, "wall_ms": 570408}
{"step": 40900, "loss": 0.24704153835773468, "loss_nce": 0.24679476022720337, "loss_mse": 0.002467817161232233, "lr": 0.00041829617637755364, "grad_norm": 0.1291239857673645, "wall_ms": 571652}
{"step": 41000, "loss": 0.3101881444454193, "loss_nce": 0.30993789434432983, "loss_mse": 0.0025025305803865194, "lr": 0.0004179107146372711, "grad_norm": 0.14249292016029358, "wall_ms": 572907}
{"step": 41100, "loss": 0.2794809341430664, "loss_nce": 0.27923110127449036, "loss_mse": 0.0024982858449220657, "lr": 0.0004175245286666788, "grad_norm": 0.13546453416347504, "wall_ms": 574150}
{"step": 41200, "loss": 0.30982518196105957, "loss_nce": 0.3095725476741791, "loss_mse": 0.0025263845454901457, "lr": 0.00041713762018259206, "grad_norm": 0.15356895327568054, "wall_ms": 575389}
{"step": 41300, "loss": 0.26359128952026367, "loss_nce": 0.26334068179130554, "loss_mse": 0.002506213029846549, "lr": 0.0004167499909050379, "grad_norm": 0.13516263663768768, "wall_ms": 576637}
{"step": 41400, "loss": 0.2530616223812103, "loss_nce": 0.2528107762336731, "loss_mse": 0.0025085073430091143, "lr": 0.0004163616425572479, "grad_norm": 0.11535057425498962, "wall_ms": 577887}
{"step": 41500, "loss": 0.23524311184883118, "loss_nce": 0.23499390482902527, "loss_mse": 0.0024921137373894453, "lr": 0.0004159725768656501, "grad_norm": 0.11990891396999359, "wall_ms": 579137}
{"step": 41600, "loss": 0.2322789877653122, "loss_nce": 0.23203352093696594, "loss_mse": 0.0024546205531805754, "lr": 0.00041558279555986176, "grad_norm": 0.12473414838314056, "wall_ms": 580394}
{"step": 41700, "loss": 0.25975102186203003, "loss_nce": 0.25950121879577637, "loss_mse": 0.002498175948858261, "lr": 0.0004151923003726813, "grad_norm": 0.1349746584892273, "wall_ms": 581659}
{"step": 41800, "loss": 0.2660742700099945, "loss_nce": 0.26582399010658264, "loss_mse": 0.0025027990341186523, "lr": 0.00041480109304008075, "grad_norm": 0.1394272744655609, "wall_ms": 582965}
{"step": 41900, "loss": 0.265229731798172, "loss_nce": 0.2649877071380615, "loss_mse": 0.002420388627797365, "lr": 0.00041440917530119817, "grad_norm": 0.13899864256381989, "wall_ms": 584273}
{"step": 42000, "loss": 0.24645119905471802, "loss_nce": 0.2462020218372345, "loss_mse": 0.002491745864972472, "lr": 0.0004140165488983296, "grad_norm": 0.13268764317035675, "wall_ms": 585576}
{"step": 42100, "loss": 0.27747127413749695, "loss_nce": 0.2772176265716553, "loss_mse": 0.0025363697204738855, "lr": 0.00041362321557692145, "grad_norm": 0.1387041211128235, "wall_ms": 586862}
{"step": 42200, "loss": 0.2580883800983429, "loss_nce": 0.25783589482307434, "loss_mse": 0.0025248404126614332, "lr": 0.0004132291770855631, "grad_norm": 0.1452980488538742, "wall_ms": 588129}
{"step": 42300, "loss": 0.28446969389915466, "loss_nce": 0.2842206358909607, "loss_mse": 0.0024904529564082623, "lr": 0.00041283443517597864, "grad_norm": 0.15084999799728394, "wall_ms": 589418}
{"step": 42400, "loss": 0.23781298100948334, "loss_nce": 0.23756620287895203, "loss_mse": 0.002467760583385825, "lr": 0.00041243899160301893, "grad_norm": 0.12734092772006989, "wall_ms": 590730}
{"step": 42500, "loss": 0.2566315829753876, "loss_nce": 0.2563799023628235, "loss_mse": 0.0025168927386403084, "lr": 0.0004120428481246548, "grad_norm": 0.1300695836544037, "wall_ms": 592032}
{"step": 42600, "loss": 0.2901032567024231, "loss_nce": 0.2898558974266052, "loss_mse": 0.0024735049810260534, "lr": 0.00041164600650196796, "grad_norm": 0.15022575855255127, "wall_ms": 593322}
{"step": 42700, "loss": 0.27125993371009827, "loss_nce": 0.27100616693496704, "loss_mse": 0.0025375608820468187, "lr": 0.0004112484684991439, "grad_norm": 0.1409384161233902, "wall_ms": 594615}
{"step": 42800, "loss": 0.2692914605140686, "loss_nce": 0.2690359055995941, "loss_mse": 0.0025556532200425863, "lr": 0.00041085023588346417, "grad_norm": 0.13357959687709808, "wall_ms": 595892}
{"step": 42900, "loss": 0.2794671356678009, "loss_nce": 0.2792165279388428, "loss_mse": 0.002506083343178034, "lr": 0.00041045131042529787, "grad_norm": 0.1333581954240799, "wall_ms": 597146}
{"step": 43000, "loss": 0.2604356110095978, "loss_nce": 0.26018691062927246, "loss_mse": 0.002486869925633073, "lr": 0.00041005169389809467, "grad_norm": 0.13770408928394318, "wall_ms": 598396}
{"step": 43100, "loss": 0.2582225501537323, "loss_nce": 0.25797039270401, "loss_mse": 0.002521639224141836, "lr": 0.0004096513880783759, "grad_norm": 0.13860835134983063, "wall_ms": 599666}
{"step": 43200, "loss": 0.29210200905799866, "loss_nce": 0.2918505072593689, "loss_mse": 0.0025149472057819366, "lr": 0.00040925039474572764, "grad_norm": 0.14916355907917023, "wall_ms": 600954}
{"step": 43300, "loss": 0.29586800932884216, "loss_nce": 0.2956174612045288, "loss_mse": 0.002505549229681492, "lr": 0.00040884871568279196, "grad_norm": 0.14863349497318268, "wall_ms": 602232}
{"step": 43400, "loss": 0.27340030670166016, "loss_nce": 0.27315056324005127, "loss_mse": 0.0024974027182906866, "lr": 0.0004084463526752598, "grad_norm": 0.14132408797740936, "wall_ms": 603511}
{"step": 43500, "loss": 0.2618614733219147, "loss_nce": 0.2616138458251953, "loss_mse": 0.002476142253726721, "lr": 0.00040804330751186224, "grad_norm": 0.1302427351474762, "wall_ms": 604795}
{"step": 43600, "loss": 0.2900598347187042, "loss_nce": 0.28981226682662964, "loss_mse": 0.0024756055790930986, "lr": 0.00040763958198436316, "grad_norm": 0.13899539411067963, "wall_ms": 606085}
{"step": 43700, "loss": 0.25992920994758606, "loss_nce": 0.2596827447414398, "loss_mse": 0.0024646595120429993, "lr": 0.00040723517788755103, "grad_norm": 0.13105890154838562, "wall_ms": 607353}
{"step": 43800, "loss": 0.28307849168777466, "loss_nce": 0.2828270196914673, "loss_mse": 0.002514776075258851, "lr": 0.00040683009701923076, "grad_norm": 0.14634229242801666, "wall_ms": 608606}
{"step": 43900, "loss": 0.2553437650203705, "loss_nce": 0.2550988793373108, "loss_mse": 0.002448925981298089, "lr": 0.00040642434118021614, "grad_norm": 0.1314428597688675, "wall_ms": 609865}
{"step": 44000, "loss": 0.291538804769516, "loss_nce": 0.2912822663784027, "loss_mse": 0.0025653636548668146, "lr": 0.0004060179121743214, "grad_norm": 0.14342066645622253, "wall_ms": 611143}
{"step": 44100, "loss": 0.28875768184661865, "loss_nce": 0.28850269317626953, "loss_mse": 0.0025500350166112185, "lr": 0.00040561081180835344, "grad_norm": 0.13357481360435486, "wall_ms": 612423}
{"step": 44200, "loss": 0.2969813644886017, "loss_nce": 0.29672956466674805, "loss_mse": 0.0025178748182952404, "lr": 0.0004052030418921038, "grad_norm": 0.12989267706871033, "wall_ms": 613693}
{"step": 44300, "loss": 0.28614482283592224, "loss_nce": 0.28589552640914917, "loss_mse": 0.002493027364835143, "lr": 0.0004047946042383406, "grad_norm": 0.13371555507183075, "wall_ms": 614960}
{"step": 44400, "loss": 0.24243082106113434, "loss_nce": 0.2421814650297165, "loss_mse": 0.002493491629138589, "lr": 0.00040438550066280017, "grad_norm": 0.12537816166877747, "wall_ms": 616226}
{"step": 44500, "loss": 0.2528890073299408, "loss_nce": 0.2526346445083618, "loss_mse": 0.0025435241404920816, "lr": 0.0004039757329841793, "grad_norm": 0.1305360347032547, "wall_ms": 617519}
{"step": 44600, "loss": 0.28991949558258057, "loss_nce": 0.2896667718887329, "loss_mse": 0.0025273466017097235, "lr": 0.0004035653030241274, "grad_norm": 0.1472545713186264, "wall_ms": 618776}
{"step": 44700, "loss": 0.23379060626029968, "loss_nce": 0.23354387283325195, "loss_mse": 0.0024673612788319588, "lr": 0.00040315421260723783, "grad_norm": 0.1211959570646286, "wall_ms": 620025}
{"step": 44800, "loss": 0.27102142572402954, "loss_nce": 0.2707676291465759, "loss_mse": 0.0025378530845046043, "lr": 0.00040274246356104007, "grad_norm": 0.1491682529449463, "wall_ms": 621302}
{"step": 44900, "loss": 0.29150477051734924, "loss_nce": 0.29125767946243286, "loss_mse": 0.002470856998115778, "lr": 0.0004023300577159916, "grad_norm": 0.15382419526576996, "wall_ms": 622560}
{"step": 45000, "loss": 0.25952649116516113, "loss_nce": 0.25927436351776123, "loss_mse": 0.002521166577935219, "lr": 0.0004019169969054698, "grad_norm": 0.13711673021316528, "wall_ms": 623824}
{"step": 45100, "loss": 0.23755502700805664, "loss_nce": 0.2373056709766388, "loss_mse": 0.0024935132823884487, "lr": 0.00040150328296576366, "grad_norm": 0.1269952952861786, "wall_ms": 625093}
{"step": 45200, "loss": 0.30055126547813416, "loss_nce": 0.3003026843070984, "loss_mse": 0.0024858410470187664, "lr": 0.00040108891773606587, "grad_norm": 0.13880617916584015, "wall_ms": 626364}
{"step": 45300, "loss": 0.28775671124458313, "loss_nce": 0.28750547766685486, "loss_mse": 0.0025122312363237143, "lr": 0.0004006739030584642, "grad_norm": 0.13185039162635803, "wall_ms": 627630}
{"step": 45400, "loss": 0.2836350202560425, "loss_nce": 0.28338393568992615, "loss_mse": 0.0025108223780989647, "lr": 0.0004002582407779338, "grad_norm": 0.1345842331647873, "wall_ms": 628902}
{"step": 45500, "loss": 0.2687346637248993, "loss_nce": 0.26848870515823364, "loss_mse": 0.0024596506264060736, "lr": 0.0003998419327423286, "grad_norm": 0.1309039443731308, "wall_ms": 630168}
{"step": 45600, "loss": 0.2908487021923065, "loss_nce": 0.2905983328819275, "loss_mse": 0.002503569470718503, "lr": 0.0003994249808023736, "grad_norm": 0.14898525178432465, "wall_ms": 631435}
{"step": 45700, "loss": 0.2516072690486908, "loss_nce": 0.2513601779937744, "loss_mse": 0.002470839535817504, "lr": 0.00039900738681165594, "grad_norm": 0.13239480555057526, "wall_ms": 632707}
{"step": 45800, "loss": 0.2995431125164032, "loss_nce": 0.29929375648498535, "loss_mse": 0.0024935125838965178, "lr": 0.0003985891526266172, "grad_norm": 0.1436568945646286, "wall_ms": 633976}
{"step": 45900, "loss": 0.2524087429046631, "loss_nce": 0.2521559000015259, "loss_mse": 0.002528504701331258, "lr": 0.00039817028010654505, "grad_norm": 0.13225394487380981, "wall_ms": 635266}
{"step": 46000, "loss": 0.2778852581977844, "loss_nce": 0.2776280641555786, "loss_mse": 0.002571987686678767, "lr": 0.00039775077111356487, "grad_norm": 0.13949480652809143, "wall_ms": 636559}
{"step": 46100, "loss": 0.28346899151802063, "loss_nce": 0.28321778774261475, "loss_mse": 0.002511948812752962, "lr": 0.0003973306275126315, "grad_norm": 0.1417447328567505, "wall_ms": 637851}
{"step": 46200, "loss": 0.24316991865634918, "loss_nce": 0.24291770160198212, "loss_mse": 0.0025221596006304026, "lr": 0.000396909851171521, "grad_norm": 0.13098391890525818, "wall_ms": 639139}
{"step": 46300, "loss": 0.2915404736995697, "loss_nce": 0.29128527641296387, "loss_mse": 0.002552117221057415, "lr": 0.0003964884439608222, "grad_norm": 0.1589595228433609, "wall_ms": 640418}
{"step": 46400, "loss": 0.26211392879486084, "loss_nce": 0.2618614137172699, "loss_mse": 0.0025251111947000027, "lr": 0.00039606640775392875, "grad_norm": 0.13608232140541077, "wall_ms": 641688}
{"step": 46500, "loss": 0.2866620123386383, "loss_nce": 0.28641119599342346, "loss_mse": 0.0025080156046897173, "lr": 0.0003956437444270303, "grad_norm": 0.14453475177288055, "wall_ms": 642981}
{"step": 46600, "loss": 0.274399995803833, "loss_nce": 0.27414652705192566, "loss_mse": 0.0025346698239445686, "lr": 0.00039522045585910453, "grad_norm": 0.13579803705215454, "wall_ms": 644290}
{"step": 46700, "loss": 0.24119724333286285, "loss_nce": 0.24095110595226288, "loss_mse": 0.00246138172224164, "lr": 0.00039479654393190873, "grad_norm": 0.1304987221956253, "wall_ms": 645595}
{"step": 46800, "loss": 0.2816568613052368, "loss_nce": 0.28140705823898315, "loss_mse": 0.00249792099930346, "lr": 0.00039437201052997114, "grad_norm": 0.13073211908340454, "wall_ms": 646901}
{"step": 46900, "loss": 0.2522844970226288, "loss_nce": 0.25203603506088257, "loss_mse": 0.002484559779986739, "lr": 0.00039394685754058303, "grad_norm": 0.11521914601325989, "wall_ms": 648531}
{"step": 47000, "loss": 0.2880878150463104, "loss_nce": 0.28783079981803894, "loss_mse": 0.002570143900811672, "lr": 0.00039352108685379, "grad_norm": 0.1498861014842987, "wall_ms": 649817}
{"step": 47100, "loss": 0.22773709893226624, "loss_nce": 0.2274925857782364, "loss_mse": 0.002445203484967351, "lr": 0.0003930947003623837, "grad_norm": 0.10763931274414062, "wall_ms": 651099}
{"step": 47200, "loss": 0.20742036402225494, "loss_nce": 0.20717164874076843, "loss_mse": 0.0024872017093002796, "lr": 0.0003926676999618932, "grad_norm": 0.11120428889989853, "wall_ms": 652372}
{"step": 47300, "loss": 0.24877774715423584, "loss_nce": 0.24853108823299408, "loss_mse": 0.0024665570817887783, "lr": 0.0003922400875505771, "grad_norm": 0.12793685495853424, "wall_ms": 653635}
{"step": 47400, "loss": 0.24213510751724243, "loss_nce": 0.24189159274101257, "loss_mse": 0.0024351959582418203, "lr": 0.0003918118650294145, "grad_norm": 0.12916837632656097, "wall_ms": 654901}
{"step": 47500, "loss": 0.24630433320999146, "loss_nce": 0.24605368077754974, "loss_mse": 0.002506505697965622, "lr": 0.00039138303430209673, "grad_norm": 0.12895379960536957, "wall_ms": 656149}
{"step": 47600, "loss": 0.24855682253837585, "loss_nce": 0.24831119179725647, "loss_mse": 0.002456279704347253, "lr": 0.00039095359727501914, "grad_norm": 0.13130024075508118, "wall_ms": 657395}
{"step": 47700, "loss": 0.225054070353508, "loss_nce": 0.22480157017707825, "loss_mse": 0.002525010844692588, "lr": 0.0003905235558572722, "grad_norm": 0.11888819187879562, "wall_ms": 658640}
{"step": 47800, "loss": 0.24468554556369781, "loss_nce": 0.2444414645433426, "loss_mse": 0.002440850716084242, "lr": 0.0003900929119606334, "grad_norm": 0.12979596853256226, "wall_ms": 659887}
{"step": 47900, "loss": 0.24773778021335602, "loss_nce": 0.24748463928699493, "loss_mse": 0.0025314760860055685, "lr": 0.0003896616674995586, "grad_norm": 0.13178379833698273, "wall_ms": 661134}
{"step": 48000, "loss": 0.2510679364204407, "loss_nce": 0.25082001090049744, "loss_mse": 0.0024792568292468786, "lr": 0.0003892298243911733, "grad_norm": 0.12568534910678864, "wall_ms": 662392}
{"step": 48100, "loss": 0.2660549283027649, "loss_nce": 0.26580455899238586, "loss_mse": 0.002503765979781747, "lr": 0.00038879738455526466, "grad_norm": 0.1403847634792328, "wall_ms": 663651}
{"step": 48200, "loss": 0.2375134378671646, "loss_nce": 0.23726332187652588, "loss_mse": 0.002501109382137656, "lr": 0.00038836434991427224, "grad_norm": 0.12998118996620178, "wall_ms": 664912}
{"step": 48300, "loss": 0.23477084934711456, "loss_nce": 0.2345244288444519, "loss_mse": 0.002464237390086055, "lr": 0.00038793072239328016, "grad_norm": 0.1112382784485817, "wall_ms": 666177}
{"step": 48400, "loss": 0.2603423595428467, "loss_nce": 0.26008927822113037, "loss_mse": 0.002530848141759634, "lr": 0.00038749650392000824, "grad_norm": 0.13123491406440735, "wall_ms": 667437}
{"step": 48500, "loss": 0.22047173976898193, "loss_nce": 0.22022350132465363, "loss_mse": 0.002482366980984807, "lr": 0.00038706169642480315, "grad_norm": 0.11864705383777618, "wall_ms": 668691}
{"step": 48600, "loss": 0.2572188377380371, "loss_nce": 0.2569665312767029, "loss_mse": 0.0025231619365513325, "lr": 0.0003866263018406302, "grad_norm": 0.12619149684906006, "wall_ms": 669954}
{"step": 48700, "loss": 0.2508695125579834, "loss_nce": 0.25062280893325806, "loss_mse": 0.002467029495164752, "lr": 0.00038619032210306483, "grad_norm": 0.12375163286924362, "wall_ms": 671229}
{"step": 48800, "loss": 0.2988276779651642, "loss_nce": 0.2985747754573822, "loss_mse": 0.0025290013290941715, "lr": 0.00038575375915028356, "grad_norm": 0.14780497550964355, "wall_ms": 672479}
{"step": 48900, "loss": 0.25407058000564575, "loss_nce": 0.2538233995437622, "loss_mse": 0.0024718029890209436, "lr": 0.0003853166149230557, "grad_norm": 0.12114930897951126, "wall_ms": 673758}
{"step": 49000, "loss": 0.2524986267089844, "loss_nce": 0.25224488973617554, "loss_mse": 0.0025372481904923916, "lr": 0.00038487889136473496, "grad_norm": 0.13498568534851074, "wall_ms": 675045}
{"step": 49100, "loss": 0.20746460556983948, "loss_nce": 0.2072175145149231, "loss_mse": 0.0024708465207368135, "lr": 0.00038444059042125, "grad_norm": 0.11292361468076706, "wall_ms": 676329}
{"step": 49200, "loss": 0.26878175139427185, "loss_nce": 0.26852893829345703, "loss_mse": 0.002528219949454069, "lr": 0.0003840017140410965, "grad_norm": 0.13475024700164795, "wall_ms": 677597}
{"step": 49300, "loss": 0.26900094747543335, "loss_nce": 0.2687531113624573, "loss_mse": 0.002478414447978139, "lr": 0.00038356226417532853, "grad_norm": 0.13455486297607422, "wall_ms": 678875}
{"step": 49400, "loss": 0.22051198780536652, "loss_nce": 0.22026486694812775, "loss_mse": 0.0024711722508072853, "lr": 0.00038312224277754916, "grad_norm": 0.11505346745252609, "wall_ms": 680154}
{"step": 49500, "loss": 0.21809370815753937, "loss_nce": 0.2178446352481842, "loss_mse": 0.0024907104671001434, "lr": 0.0003826816518039027, "grad_norm": 0.12297941744327545, "wall_ms": 681410}
{"step": 49600, "loss": 0.22888819873332977, "loss_nce": 0.22863632440567017, "loss_mse": 0.0025186927523463964, "lr": 0.00038224049321306517, "grad_norm": 0.12914946675300598, "wall_ms": 682680}
{"step": 49700, "loss": 0.23784461617469788, "loss_nce": 0.23759549856185913, "loss_mse": 0.0024912336375564337, "lr": 0.0003817987689662362, "grad_norm": 0.11917508393526077, "wall_ms": 683942}
{"step": 49800, "loss": 0.26569467782974243, "loss_nce": 0.26544296741485596, "loss_mse": 0.0025169781874865294, "lr": 0.00038135648102713003, "grad_norm": 0.1415504813194275, "wall_ms": 685212}
{"step": 49900, "loss": 0.2657649517059326, "loss_nce": 0.26551494002342224, "loss_mse": 0.0025002267211675644, "lr": 0.00038091363136196686, "grad_norm": 0.12460636347532272, "wall_ms": 686488}
{"step": 50000, "loss": 0.24717825651168823, "loss_nce": 0.24693164229393005, "loss_mse": 0.0024661500938236713, "lr": 0.000380470221939464, "grad_norm": 0.12375947833061218, "wall_ms": 687755}
{"step": 50100, "loss": 0.26037099957466125, "loss_nce": 0.26012080907821655, "loss_mse": 0.00250187492929399, "lr": 0.0003800262547308272, "grad_norm": 0.13657988607883453, "wall_ms": 701005}
{"step": 50200, "loss": 0.2215692400932312, "loss_nce": 0.22132250666618347, "loss_mse": 0.002467351732775569, "lr": 0.00037958173170974193, "grad_norm": 0.11965075880289078, "wall_ms": 702274}
{"step": 50300, "loss": 0.25216925144195557, "loss_nce": 0.2519163191318512, "loss_mse": 0.00252920133061707, "lr": 0.0003791366548523646, "grad_norm": 0.14231853187084198, "wall_ms": 703551}
{"step": 50400, "loss": 0.26152458786964417, "loss_nce": 0.2612772583961487, "loss_mse": 0.0024733373429626226, "lr": 0.0003786910261373137, "grad_norm": 0.1286580115556717, "wall_ms": 704813}
{"step": 50500, "loss": 0.27154502272605896, "loss_nce": 0.27129051089286804, "loss_mse": 0.0025452114641666412, "lr": 0.0003782448475456609, "grad_norm": 0.13082078099250793, "wall_ms": 706061}
{"step": 50600, "loss": 0.2614802122116089, "loss_nce": 0.26122796535491943, "loss_mse": 0.002522416179999709, "lr": 0.0003777981210609226, "grad_norm": 0.14161086082458496, "wall_ms": 707314}
{"step": 50700, "loss": 0.22044500708580017, "loss_nce": 0.22019648551940918, "loss_mse": 0.0024852671194821596, "lr": 0.0003773508486690508, "grad_norm": 0.1098800078034401, "wall_ms": 708562}
{"step": 50800, "loss": 0.24345020949840546, "loss_nce": 0.2432052195072174, "loss_mse": 0.0024498626589775085, "lr": 0.0003769030323584244, "grad_norm": 0.13462980091571808, "wall_ms": 709813}
{"step": 50900, "loss": 0.2706562578678131, "loss_nce": 0.2704055905342102, "loss_mse": 0.0025066800881177187, "lr": 0.0003764546741198404, "grad_norm": 0.13327181339263916, "wall_ms": 711064}
{"step": 51000, "loss": 0.22827483713626862, "loss_nce": 0.22802510857582092, "loss_mse": 0.0024972620885819197, "lr": 0.00037600577594650453, "grad_norm": 0.11954786628484726, "wall_ms": 712315}
{"step": 51100, "loss": 0.24447201192378998, "loss_nce": 0.24422529339790344, "loss_mse": 0.002467172686010599, "lr": 0.00037555633983402353, "grad_norm": 0.12240054458379745, "wall_ms": 713586}
{"step": 51200, "loss": 0.25084856152534485, "loss_nce": 0.2505955100059509, "loss_mse": 0.0025304409209638834, "lr": 0.00037510636778039516, "grad_norm": 0.1252833753824234, "wall_ms": 714870}
{"step": 51300, "loss": 0.27890142798423767, "loss_nce": 0.27864840626716614, "loss_mse": 0.002530344296246767, "lr": 0.00037465586178599965, "grad_norm": 0.14564555883407593, "wall_ms": 716140}
{"step": 51400, "loss": 0.2339560091495514, "loss_nce": 0.23370350897312164, "loss_mse": 0.0025250131729990244, "lr": 0.00037420482385359114, "grad_norm": 0.12452518939971924, "wall_ms": 717384}
{"step": 51500, "loss": 0.27037665247917175, "loss_nce": 0.2701270878314972, "loss_mse": 0.0024956902489066124, "lr": 0.00037375325598828845, "grad_norm": 0.14064675569534302, "wall_ms": 718628}
{"step": 51600, "loss": 0.2888162136077881, "loss_nce": 0.28856685757637024, "loss_mse": 0.0024934285320341587, "lr": 0.00037330116019756646, "grad_norm": 0.13512954115867615, "wall_ms": 719869}
{"step": 51700, "loss": 0.21251432597637177, "loss_nce": 0.21226829290390015, "loss_mse": 0.002460351912304759, "lr": 0.0003728485384912465, "grad_norm": 0.11329604685306549, "wall_ms": 721109}
{"step": 51800, "loss": 0.30147743225097656, "loss_nce": 0.3012293875217438, "loss_mse": 0.002480545546859503, "lr": 0.00037239539288148833, "grad_norm": 0.14654897153377533, "wall_ms": 722349}
{"step": 51900, "loss": 0.2516302466392517, "loss_nce": 0.25138139724731445, "loss_mse": 0.0024884208105504513, "lr": 0.0003719417253827805, "grad_norm": 0.12370840460062027, "wall_ms": 723592}
{"step": 52000, "loss": 0.2978788912296295, "loss_nce": 0.2976272702217102, "loss_mse": 0.002516339998692274, "lr": 0.00037148753801193187, "grad_norm": 0.1446738988161087, "wall_ms": 724830}
{"step": 52100, "loss": 0.30131566524505615, "loss_nce": 0.30106407403945923, "loss_mse": 0.0025159604847431183, "lr": 0.0003710328327880623, "grad_norm": 0.1478477418422699, "wall_ms": 726081}
{"step": 52200, "loss": 0.25631335377693176, "loss_nce": 0.2560615837574005, "loss_mse": 0.0025176273193210363, "lr": 0.0003705776117325936, "grad_norm": 0.13403074443340302, "wall_ms": 727324}
{"step": 52300, "loss": 0.25122982263565063, "loss_nce": 0.25098326802253723, "loss_mse": 0.002465482335537672, "lr": 0.00037012187686924123, "grad_norm": 0.12785978615283966, "wall_ms": 728567}
{"step": 52400, "loss": 0.25099414587020874, "loss_nce": 0.25074484944343567, "loss_mse": 0.002492868574336171, "lr": 0.00036966563022400426, "grad_norm": 0.13343605399131775, "wall_ms": 729807}
{"step": 52500, "loss": 0.26197439432144165, "loss_nce": 0.2617225646972656, "loss_mse": 0.0025184256955981255, "lr": 0.00036920887382515746, "grad_norm": 0.12321730703115463, "wall_ms": 731046}
{"step": 52600, "loss": 0.2574833333492279, "loss_nce": 0.2572304606437683, "loss_mse": 0.0025287794414907694, "lr": 0.00036875160970324135, "grad_norm": 0.12698858976364136, "wall_ms": 732284}
{"step": 52700, "loss": 0.2720482647418976, "loss_nce": 0.27179771661758423, "loss_mse": 0.0025053934659808874, "lr": 0.00036829383989105363, "grad_norm": 0.1284351646900177, "wall_ms": 733539}
{"step": 52800, "loss": 0.22931702435016632, "loss_nce": 0.22906582057476044, "loss_mse": 0.002512072678655386, "lr": 0.00036783556642364025, "grad_norm": 0.12828092277050018, "wall_ms": 734793}
{"step": 52900, "loss": 0.2933964729309082, "loss_nce": 0.2931417226791382, "loss_mse": 0.002547420794144273, "lr": 0.00036737679133828606, "grad_norm": 0.14441613852977753, "wall_ms": 736038}
{"step": 53000, "loss": 0.23534588515758514, "loss_nce": 0.2350911945104599, "loss_mse": 0.0025468941312283278, "lr": 0.0003669175166745059, "grad_norm": 0.1268780529499054, "wall_ms": 737283}
{"step": 53100, "loss": 0.2583332359790802, "loss_nce": 0.2580810785293579, "loss_mse": 0.0025215023197233677, "lr": 0.0003664577444740355, "grad_norm": 0.13365019857883453, "wall_ms": 738528}
{"step": 53200, "loss": 0.26508161425590515, "loss_nce": 0.26483285427093506, "loss_mse": 0.0024877393152564764, "lr": 0.0003659974767808225, "grad_norm": 0.134281724691391, "wall_ms": 739772}
{"step": 53300, "loss": 0.28546667098999023, "loss_nce": 0.2852148413658142, "loss_mse": 0.002518349327147007, "lr": 0.0003655367156410173, "grad_norm": 0.13834238052368164, "wall_ms": 741031}
{"step": 53400, "loss": 0.22023944556713104, "loss_nce": 0.2199934422969818, "loss_mse": 0.002460043877363205, "lr": 0.00036507546310296377, "grad_norm": 0.11419736593961716, "wall_ms": 742292}
{"step": 53500, "loss": 0.26184695959091187, "loss_nce": 0.2615928053855896, "loss_mse": 0.0025415755808353424, "lr": 0.0003646137212171905, "grad_norm": 0.13128302991390228, "wall_ms": 743546}
{"step": 53600, "loss": 0.246309295296669, "loss_nce": 0.24605661630630493, "loss_mse": 0.0025267223827540874, "lr": 0.0003641514920364015, "grad_norm": 0.1299111396074295, "wall_ms": 744785}
{"step": 53700, "loss": 0.25904613733291626, "loss_nce": 0.25879496335983276, "loss_mse": 0.002511869417503476, "lr": 0.00036368877761546673, "grad_norm": 0.12833420932292938, "wall_ms": 746030}
{"step": 53800, "loss": 0.2352043241262436, "loss_nce": 0.23494982719421387, "loss_mse": 0.0025449665263295174, "lr": 0.00036322558001141407, "grad_norm": 0.1224990263581276, "wall_ms": 747277}
{"step": 53900, "loss": 0.257035493850708, "loss_nce": 0.2567908763885498, "loss_mse": 0.0024460384156554937, "lr": 0.0003627619012834188, "grad_norm": 0.1295890361070633, "wall_ms": 748532}
{"step": 54000, "loss": 0.22984802722930908, "loss_nce": 0.2295970469713211, "loss_mse": 0.0025098228361457586, "lr": 0.00036229774349279503, "grad_norm": 0.1221577376127243, "wall_ms": 749793}
{"step": 54100, "loss": 0.2267662137746811, "loss_nce": 0.22651180624961853, "loss_mse": 0.002544128568843007, "lr": 0.00036183310870298703, "grad_norm": 0.11844368278980255, "wall_ms": 751056}
{"step": 54200, "loss": 0.27326521277427673, "loss_nce": 0.27301347255706787, "loss_mse": 0.002517548855394125, "lr": 0.00036136799897955926, "grad_norm": 0.12958380579948425, "wall_ms": 752295}
{"step": 54300, "loss": 0.2417684942483902, "loss_nce": 0.2415163218975067, "loss_mse": 0.002521747723221779, "lr": 0.00036090241639018765, "grad_norm": 0.12446071207523346, "wall_ms": 753534}
{"step": 54400, "loss": 0.28832095861434937, "loss_nce": 0.2880696654319763, "loss_mse": 0.0025128743145614862, "lr": 0.00036043636300465027, "grad_norm": 0.13636013865470886, "wall_ms": 754772}
{"step": 54500, "loss": 0.24515922367572784, "loss_nce": 0.2449086457490921, "loss_mse": 0.0025057438760995865, "lr": 0.00035996984089481805, "grad_norm": 0.119978167116642, "wall_ms": 756013}
{"step": 54600, "loss": 0.2651962339878082, "loss_nce": 0.2649378180503845, "loss_mse": 0.0025841211900115013, "lr": 0.0003595028521346458, "grad_norm": 0.1673954874277115, "wall_ms": 757255}
{"step": 54700, "loss": 0.2729504108428955, "loss_nce": 0.2726941704750061, "loss_mse": 0.0025622788816690445, "lr": 0.0003590353988001629, "grad_norm": 0.1438414454460144, "wall_ms": 758495}
{"step": 54800, "loss": 0.25254929065704346, "loss_nce": 0.25229835510253906, "loss_mse": 0.00250933482311666, "lr": 0.0003585674829694638, "grad_norm": 0.13378264009952545, "wall_ms": 759736}
{"step": 54900, "loss": 0.2145356833934784, "loss_nce": 0.21428313851356506, "loss_mse": 0.002525421790778637, "lr": 0.00035809910672269916, "grad_norm": 0.11268853396177292, "wall_ms": 760976}
{"step": 55000, "loss": 0.24396774172782898, "loss_nce": 0.24372021853923798, "loss_mse": 0.002475293120369315, "lr": 0.00035763027214206653, "grad_norm": 0.12624545395374298, "wall_ms": 762217}
{"step": 55100, "loss": 0.27312028408050537, "loss_nce": 0.2728668451309204, "loss_mse": 0.0025343908928334713, "lr": 0.0003571609813118008, "grad_norm": 0.13528287410736084, "wall_ms": 763460}
{"step": 55200, "loss": 0.2608318626880646, "loss_nce": 0.2605825662612915, "loss_mse": 0.0024928897619247437, "lr": 0.00035669123631816525, "grad_norm": 0.12713663280010223, "wall_ms": 764703}
{"step": 55300, "loss": 0.24604010581970215, "loss_nce": 0.24578897655010223, "loss_mse": 0.0025113257579505444, "lr": 0.00035622103924944234, "grad_norm": 0.1190253272652626, "wall_ms": 765946}
{"step": 55400, "loss": 0.287160187959671, "loss_nce": 0.2869136929512024, "loss_mse": 0.0024649149272590876, "lr": 0.0003557503921959239, "grad_norm": 0.14696992933750153, "wall_ms": 767187}
{"step": 55500, "loss": 0.2013181447982788, "loss_nce": 0.2010727822780609, "loss_mse": 0.0024535765405744314, "lr": 0.0003552792972499025, "grad_norm": 0.11573873460292816, "wall_ms": 768431}
{"step": 55600, "loss": 0.2581869065761566, "loss_nce": 0.2579342722892761, "loss_mse": 0.002526251133531332, "lr": 0.0003548077565056618, "grad_norm": 0.13371221721172333, "wall_ms": 769671}
{"step": 55700, "loss": 0.276998907327652, "loss_nce": 0.27674999833106995, "loss_mse": 0.0024889649357646704, "lr": 0.000354335772059467, "grad_norm": 0.1347120702266693, "wall_ms": 770914}
{"step": 55800, "loss": 0.26084592938423157, "loss_nce": 0.26059216260910034, "loss_mse": 0.0025376419071108103, "lr": 0.0003538633460095563, "grad_norm": 0.12514550983905792, "wall_ms": 772154}
{"step": 55900, "loss": 0.27072691917419434, "loss_nce": 0.27047616243362427, "loss_mse": 0.0025075499434024096, "lr": 0.0003533904804561307, "grad_norm": 0.13401752710342407, "wall_ms": 773395}
{"step": 56000, "loss": 0.32609257102012634, "loss_nce": 0.32583755254745483, "loss_mse": 0.002550037344917655, "lr": 0.0003529171775013451, "grad_norm": 0.14907363057136536, "wall_ms": 774636}
{"step": 56100, "loss": 0.2318301647901535, "loss_nce": 0.23158037662506104, "loss_mse": 0.002497877227142453, "lr": 0.0003524434392492992, "grad_norm": 0.11352091282606125, "wall_ms": 775877}
{"step": 56200, "loss": 0.2503156065940857, "loss_nce": 0.2500721514225006, "loss_mse": 0.002434407826513052, "lr": 0.0003519692678060273, "grad_norm": 0.12530238926410675, "wall_ms": 777118}
{"step": 56300, "loss": 0.2520536184310913, "loss_nce": 0.25180184841156006, "loss_mse": 0.0025175791233778, "lr": 0.0003514946652794899, "grad_norm": 0.13297928869724274, "wall_ms": 778367}
{"step": 56400, "loss": 0.27748605608940125, "loss_nce": 0.2772344946861267, "loss_mse": 0.002515467582270503, "lr": 0.0003510196337795637, "grad_norm": 0.13992245495319366, "wall_ms": 779609}
{"step": 56500, "loss": 0.22041095793247223, "loss_nce": 0.22015921771526337, "loss_mse": 0.002517461543902755, "lr": 0.0003505441754180327, "grad_norm": 0.12104897946119308, "wall_ms": 780868}
{"step": 56600, "loss": 0.23348833620548248, "loss_nce": 0.23323160409927368, "loss_mse": 0.0025673215277493, "lr": 0.00035006829230857815, "grad_norm": 0.1309959888458252, "wall_ms": 782143}
{"step": 56700, "loss": 0.24518366158008575, "loss_nce": 0.24493178725242615, "loss_mse": 0.002518712542951107, "lr": 0.00034959198656676996, "grad_norm": 0.13444675505161285, "wall_ms": 783399}
{"step": 56800, "loss": 0.2915179431438446, "loss_nce": 0.291268527507782, "loss_mse": 0.0024940872099250555, "lr": 0.0003491152603100565, "grad_norm": 0.13220389187335968, "wall_ms": 784643}
{"step": 56900, "loss": 0.2002030611038208, "loss_nce": 0.19995450973510742, "loss_mse": 0.002485486678779125, "lr": 0.0003486381156577558, "grad_norm": 0.11723731458187103, "wall_ms": 785885}
{"step": 57000, "loss": 0.2732408940792084, "loss_nce": 0.2729862928390503, "loss_mse": 0.002545948140323162, "lr": 0.00034816055473104574, "grad_norm": 0.1398402601480484, "wall_ms": 787136}
{"step": 57100, "loss": 0.25968044996261597, "loss_nce": 0.25942566990852356, "loss_mse": 0.002547797281295061, "lr": 0.00034768257965295507, "grad_norm": 0.13898834586143494, "wall_ms": 788409}
{"step": 57200, "loss": 0.22636546194553375, "loss_nce": 0.226114422082901, "loss_mse": 0.0025103744119405746, "lr": 0.00034720419254835334, "grad_norm": 0.12121252715587616, "wall_ms": 789659}
{"step": 57300, "loss": 0.25667309761047363, "loss_nce": 0.256428062915802, "loss_mse": 0.0024502654559910297, "lr": 0.0003467253955439418, "grad_norm": 0.13320916891098022, "wall_ms": 790918}
{"step": 57400, "loss": 0.2228981852531433, "loss_nce": 0.22265055775642395, "loss_mse": 0.0024762589018791914, "lr": 0.00034624619076824433, "grad_norm": 0.11587473005056381, "wall_ms": 792164}
{"step": 57500, "loss": 0.24741803109645844, "loss_nce": 0.2471659928560257, "loss_mse": 0.0025203432887792587, "lr": 0.0003457665803515972, "grad_norm": 0.12456914782524109, "wall_ms": 793414}
{"step": 57600, "loss": 0.29054534435272217, "loss_nce": 0.2902945876121521, "loss_mse": 0.002507556928321719, "lr": 0.00034528656642614003, "grad_norm": 0.1355145275592804, "wall_ms": 794659}
{"step": 57700, "loss": 0.2520560920238495, "loss_nce": 0.25180113315582275, "loss_mse": 0.0025495279114693403, "lr": 0.0003448061511258065, "grad_norm": 0.13160207867622375, "wall_ms": 795902}
{"step": 57800, "loss": 0.27609968185424805, "loss_nce": 0.2758498787879944, "loss_mse": 0.0024981272872537374, "lr": 0.0003443253365863142, "grad_norm": 0.14354237914085388, "wall_ms": 797141}
{"step": 57900, "loss": 0.232334166765213, "loss_nce": 0.2320842444896698, "loss_mse": 0.002499232767149806, "lr": 0.0003438441249451561, "grad_norm": 0.12218460440635681, "wall_ms": 798380}
{"step": 58000, "loss": 0.23511946201324463, "loss_nce": 0.2348679006099701, "loss_mse": 0.0025156443007290363, "lr": 0.00034336251834159013, "grad_norm": 0.12602268159389496, "wall_ms": 799624}
{"step": 58100, "loss": 0.2807289958000183, "loss_nce": 0.2804727554321289, "loss_mse": 0.0025625284761190414, "lr": 0.00034288051891663015, "grad_norm": 0.125089630484581, "wall_ms": 800870}
{"step": 58200, "loss": 0.24405130743980408, "loss_nce": 0.24379907548427582, "loss_mse": 0.002522376598790288, "lr": 0.0003423981288130363, "grad_norm": 0.12624502182006836, "wall_ms": 802114}
{"step": 58300, "loss": 0.2439626008272171, "loss_nce": 0.2437151074409485, "loss_mse": 0.002474915236234665, "lr": 0.0003419153501753055, "grad_norm": 0.12629368901252747, "wall_ms": 803359}
{"step": 58400, "loss": 0.26887384057044983, "loss_nce": 0.26862287521362305, "loss_mse": 0.0025097422767430544, "lr": 0.0003414321851496622, "grad_norm": 0.13578666746616364, "wall_ms": 804608}
{"step": 58500, "loss": 0.2626844048500061, "loss_nce": 0.26243722438812256, "loss_mse": 0.0024718972854316235, "lr": 0.00034094863588404826, "grad_norm": 0.1366676390171051, "wall_ms": 805864}
{"step": 58600, "loss": 0.24888142943382263, "loss_nce": 0.24863214790821075, "loss_mse": 0.002492810832336545, "lr": 0.0003404647045281137, "grad_norm": 0.1242728978395462, "wall_ms": 807119}
{"step": 58700, "loss": 0.26697611808776855, "loss_nce": 0.26672983169555664, "loss_mse": 0.0024627172388136387, "lr": 0.0003399803932332072, "grad_norm": 0.1412188708782196, "wall_ms": 808367}
{"step": 58800, "loss": 0.24680574238300323, "loss_nce": 0.24655193090438843, "loss_mse": 0.002538180910050869, "lr": 0.00033949570415236656, "grad_norm": 0.12901687622070312, "wall_ms": 809619}
{"step": 58900, "loss": 0.26421886682510376, "loss_nce": 0.2639719247817993, "loss_mse": 0.0024695000611245632, "lr": 0.00033901063944030913, "grad_norm": 0.12713515758514404, "wall_ms": 810880}
{"step": 59000, "loss": 0.28796419501304626, "loss_nce": 0.2877146005630493, "loss_mse": 0.002495800843462348, "lr": 0.00033852520125342186, "grad_norm": 0.14064595103263855, "wall_ms": 812139}
{"step": 59100, "loss": 0.2365429401397705, "loss_nce": 0.2362961322069168, "loss_mse": 0.0024681338109076023, "lr": 0.0003380393917497523, "grad_norm": 0.11579785495996475, "wall_ms": 813412}
{"step": 59200, "loss": 0.25319069623947144, "loss_nce": 0.2529395520687103, "loss_mse": 0.0025115092284977436, "lr": 0.00033755321308899836, "grad_norm": 0.12974585592746735, "wall_ms": 814702}
{"step": 59300, "loss": 0.2498035430908203, "loss_nce": 0.24955040216445923, "loss_mse": 0.002531378297135234, "lr": 0.00033706666743249964, "grad_norm": 0.1281924545764923, "wall_ms": 815980}
{"step": 59400, "loss": 0.2592511773109436, "loss_nce": 0.2589987814426422, "loss_mse": 0.0025239111855626106, "lr": 0.0003365797569432267, "grad_norm": 0.1361384093761444, "wall_ms": 817235}
{"step": 59500, "loss": 0.2359386831521988, "loss_nce": 0.23569053411483765, "loss_mse": 0.0024815509095788, "lr": 0.00033609248378577215, "grad_norm": 0.11536309868097305, "wall_ms": 818475}
{"step": 59600, "loss": 0.25104865431785583, "loss_nce": 0.25080305337905884, "loss_mse": 0.0024560352321714163, "lr": 0.000335604850126341, "grad_norm": 0.13000141084194183, "wall_ms": 819737}
{"step": 59700, "loss": 0.23175904154777527, "loss_nce": 0.23151126503944397, "loss_mse": 0.0024777452927082777, "lr": 0.00033511685813274064, "grad_norm": 0.12267681956291199, "wall_ms": 821016}
{"step": 59800, "loss": 0.28001630306243896, "loss_nce": 0.27976810932159424, "loss_mse": 0.0024818871170282364, "lr": 0.00033462850997437177, "grad_norm": 0.13621653616428375, "wall_ms": 822288}
{"step": 59900, "loss": 0.22716711461544037, "loss_nce": 0.22692173719406128, "loss_mse": 0.002453804947435856, "lr": 0.0003341398078222182, "grad_norm": 0.12020821124315262, "wall_ms": 823577}
{"step": 60000, "loss": 0.26190295815467834, "loss_nce": 0.26165151596069336, "loss_mse": 0.002514452673494816, "lr": 0.00033365075384883763, "grad_norm": 0.13621766865253448, "wall_ms": 824874}
{"step": 60100, "loss": 0.26345184445381165, "loss_nce": 0.2632012367248535, "loss_mse": 0.0025061341002583504, "lr": 0.0003331613502283515, "grad_norm": 0.14194424450397491, "wall_ms": 840618}
{"step": 60200, "loss": 0.27894940972328186, "loss_nce": 0.278702974319458, "loss_mse": 0.002464243210852146, "lr": 0.0003326715991364361, "grad_norm": 0.176924929022789, "wall_ms": 841909}
{"step": 60300, "loss": 0.28373363614082336, "loss_nce": 0.2834838628768921, "loss_mse": 0.002497742883861065, "lr": 0.000332181502750312, "grad_norm": 0.12938718497753143, "wall_ms": 843170}
{"step": 60400, "loss": 0.2887727916240692, "loss_nce": 0.28852158784866333, "loss_mse": 0.0025120771024376154, "lr": 0.000331691063248735, "grad_norm": 0.12965400516986847, "wall_ms": 844448}
{"step": 60500, "loss": 0.2470245361328125, "loss_nce": 0.24677754938602448, "loss_mse": 0.002469850704073906, "lr": 0.0003312002828119861, "grad_norm": 0.12159138172864914, "wall_ms": 845724}
{"step": 60600, "loss": 0.23638342320919037, "loss_nce": 0.23613318800926208, "loss_mse": 0.002502365503460169, "lr": 0.00033070916362186215, "grad_norm": 0.12532587349414825, "wall_ms": 847008}
{"step": 60700, "loss": 0.24061158299446106, "loss_nce": 0.24036365747451782, "loss_mse": 0.0024791997857391834, "lr": 0.0003302177078616658, "grad_norm": 0.13879722356796265, "wall_ms": 848285}
{"step": 60800, "loss": 0.2650456130504608, "loss_nce": 0.26479607820510864, "loss_mse": 0.002495307009667158, "lr": 0.00032972591771619606, "grad_norm": 0.12627862393856049, "wall_ms": 849574}
{"step": 60900, "loss": 0.2331276535987854, "loss_nce": 0.23288235068321228, "loss_mse": 0.002452985616400838, "lr": 0.0003292337953717385, "grad_norm": 0.12512639164924622, "wall_ms": 850818}
{"step": 61000, "loss": 0.256451278924942, "loss_nce": 0.25620269775390625, "loss_mse": 0.002485942794010043, "lr": 0.00032874134301605514, "grad_norm": 0.12776798009872437, "wall_ms": 852074}
{"step": 61100, "loss": 0.2569209933280945, "loss_nce": 0.25667425990104675, "loss_mse": 0.002467378042638302, "lr": 0.00032824856283837547, "grad_norm": 0.12507624924182892, "wall_ms": 853322}
{"step": 61200, "loss": 0.2526528835296631, "loss_nce": 0.2524026334285736, "loss_mse": 0.002502450253814459, "lr": 0.0003277554570293863, "grad_norm": 0.12988881766796112, "wall_ms": 854563}
{"step": 61300, "loss": 0.2119378000497818, "loss_nce": 0.21169236302375793, "loss_mse": 0.002454300643876195, "lr": 0.0003272620277812219, "grad_norm": 0.11589878052473068, "wall_ms": 855806}
{"step": 61400, "loss": 0.239052414894104, "loss_nce": 0.23880179226398468, "loss_mse": 0.0025061974301934242, "lr": 0.0003267682772874546, "grad_norm": 0.1300705075263977, "wall_ms": 857043}
{"step": 61500, "loss": 0.25878897309303284, "loss_nce": 0.25854089856147766, "loss_mse": 0.002480862895026803, "lr": 0.00032627420774308455, "grad_norm": 0.1291850060224533, "wall_ms": 858288}
{"step": 61600, "loss": 0.2593485713005066, "loss_nce": 0.25909703969955444, "loss_mse": 0.002515192376449704, "lr": 0.0003257798213445304, "grad_norm": 0.12454751878976822, "wall_ms": 859526}
{"step": 61700, "loss": 0.2603921592235565, "loss_nce": 0.26013851165771484, "loss_mse": 0.002536483807489276, "lr": 0.00032528512028961976, "grad_norm": 0.12818636000156403, "wall_ms": 860794}
{"step": 61800, "loss": 0.21297374367713928, "loss_nce": 0.2127293050289154, "loss_mse": 0.0024443170987069607, "lr": 0.00032479010677757857, "grad_norm": 0.11217261850833893, "wall_ms": 862053}
{"step": 61900, "loss": 0.2268630415201187, "loss_nce": 0.2266179621219635, "loss_mse": 0.0024508654605597258, "lr": 0.0003242947830090219, "grad_norm": 0.12098300457000732, "wall_ms": 863305}
{"step": 62000, "loss": 0.23043963313102722, "loss_nce": 0.2301841527223587, "loss_mse": 0.002554855076596141, "lr": 0.0003237991511859443, "grad_norm": 0.12969593703746796, "wall_ms": 864549}
{"step": 62100, "loss": 0.2754151225090027, "loss_nce": 0.2751635015010834, "loss_mse": 0.0025162575766444206, "lr": 0.00032330321351170966, "grad_norm": 0.1356205940246582, "wall_ms": 865799}
{"step": 62200, "loss": 0.2523130476474762, "loss_nce": 0.2520652413368225, "loss_mse": 0.0024779713712632656, "lr": 0.00032280697219104156, "grad_norm": 0.12718117237091064, "wall_ms": 867050}
{"step": 62300, "loss": 0.2666568160057068, "loss_nce": 0.26640355587005615, "loss_mse": 0.002532616024836898, "lr": 0.0003223104294300135, "grad_norm": 0.130887970328331, "wall_ms": 868302}
{"step": 62400, "loss": 0.24463734030723572, "loss_nce": 0.24438980221748352, "loss_mse": 0.00247534760273993, "lr": 0.0003218135874360388, "grad_norm": 0.13210782408714294, "wall_ms": 869554}
{"step": 62500, "loss": 0.2758709490299225, "loss_nce": 0.27561938762664795, "loss_mse": 0.0025157316122204065, "lr": 0.0003213164484178616, "grad_norm": 0.14142130315303802, "wall_ms": 870804}
{"step": 62600, "loss": 0.2508265972137451, "loss_nce": 0.2505701780319214, "loss_mse": 0.0025643296539783478, "lr": 0.0003208190145855459, "grad_norm": 0.13869228959083557, "wall_ms": 872051}
{"step": 62700, "loss": 0.22636300325393677, "loss_nce": 0.22611376643180847, "loss_mse": 0.0024924222379922867, "lr": 0.00032032128815046674, "grad_norm": 0.12860162556171417, "wall_ms": 873301}
{"step": 62800, "loss": 0.25075000524520874, "loss_nce": 0.25049933791160583, "loss_mse": 0.0025065799709409475, "lr": 0.0003198232713252996, "grad_norm": 0.1279093325138092, "wall_ms": 874550}
{"step": 62900, "loss": 0.25229763984680176, "loss_nce": 0.25204986333847046, "loss_mse": 0.002477689180523157, "lr": 0.0003193249663240111, "grad_norm": 0.12871681153774261, "wall_ms": 875795}
{"step": 63000, "loss": 0.2762966752052307, "loss_nce": 0.2760404348373413, "loss_mse": 0.002562494482845068, "lr": 0.00031882637536184905, "grad_norm": 0.1406698375940323, "wall_ms": 877042}
{"step": 63100, "loss": 0.2391074150800705, "loss_nce": 0.23885947465896606, "loss_mse": 0.002479329938068986, "lr": 0.00031832750065533227, "grad_norm": 0.12203925102949142, "wall_ms": 878291}
{"step": 63200, "loss": 0.2377629280090332, "loss_nce": 0.2375156283378601, "loss_mse": 0.0024729736614972353, "lr": 0.00031782834442224125, "grad_norm": 0.11425679922103882, "wall_ms": 879544}
{"step": 63300, "loss": 0.19770361483097076, "loss_nce": 0.1974555104970932, "loss_mse": 0.0024810528848320246, "lr": 0.00031732890888160775, "grad_norm": 0.118019238114357, "wall_ms": 880790}
{"step": 63400, "loss": 0.22077740728855133, "loss_nce": 0.22052821516990662, "loss_mse": 0.0024919777642935514, "lr": 0.0003168291962537054, "grad_norm": 0.12432631850242615, "wall_ms": 882036}
{"step": 63500, "loss": 0.2629060447216034, "loss_nce": 0.2626590430736542, "loss_mse": 0.0024699170608073473, "lr": 0.00031632920876003976, "grad_norm": 0.13096429407596588, "wall_ms": 883277}
{"step": 63600, "loss": 0.2539936900138855, "loss_nce": 0.25374311208724976, "loss_mse": 0.002505882875993848, "lr": 0.0003158289486233379, "grad_norm": 0.13827082514762878, "wall_ms": 884518}
{"step": 63700, "loss": 0.2914639413356781, "loss_nce": 0.29120755195617676, "loss_mse": 0.0025639566592872143, "lr": 0.0003153284180675394, "grad_norm": 0.1401515156030655, "wall_ms": 885765}
{"step": 63800, "loss": 0.30544811487197876, "loss_nce": 0.3052016794681549, "loss_mse": 0.0024643426295369864, "lr": 0.00031482761931778564, "grad_norm": 0.1523587703704834, "wall_ms": 887006}
{"step": 63900, "loss": 0.24755434691905975, "loss_nce": 0.24730165302753448, "loss_mse": 0.0025269787292927504, "lr": 0.00031432655460041054, "grad_norm": 0.12826302647590637, "wall_ms": 888254}
{"step": 64000, "loss": 0.2575652003288269, "loss_nce": 0.2573164701461792, "loss_mse": 0.0024872024077922106, "lr": 0.0003138252261429303, "grad_norm": 0.1275625377893448, "wall_ms": 889495}
{"step": 64100, "loss": 0.28464818000793457, "loss_nce": 0.2843971848487854, "loss_mse": 0.0025099029298871756, "lr": 0.00031332363617403355, "grad_norm": 0.13868248462677002, "wall_ms": 890738}
{"step": 64200, "loss": 0.24893556535243988, "loss_nce": 0.24868711829185486, "loss_mse": 0.002484508790075779, "lr": 0.00031282178692357157, "grad_norm": 0.12766285240650177, "wall_ms": 891980}
{"step": 64300, "loss": 0.27243664860725403, "loss_nce": 0.27218297123908997, "loss_mse": 0.0025366423651576042, "lr": 0.00031231968062254815, "grad_norm": 0.1370019167661667, "wall_ms": 893221}
{"step": 64400, "loss": 0.25663667917251587, "loss_nce": 0.256387859582901, "loss_mse": 0.0024883130099624395, "lr": 0.00031181731950311004, "grad_norm": 0.1383790224790573, "wall_ms": 894462}
{"step": 64500, "loss": 0.2640295624732971, "loss_nce": 0.2637835144996643, "loss_mse": 0.002460543531924486, "lr": 0.00031131470579853673, "grad_norm": 0.12220215052366257, "wall_ms": 895712}
{"step": 64600, "loss": 0.2497994303703308, "loss_nce": 0.24954524636268616, "loss_mse": 0.002541826805099845, "lr": 0.00031081184174323027, "grad_norm": 0.12138888239860535, "wall_ms": 896961}
{"step": 64700, "loss": 0.2516889274120331, "loss_nce": 0.25143736600875854, "loss_mse": 0.0025156529154628515, "lr": 0.0003103087295727062, "grad_norm": 0.13409586250782013, "wall_ms": 898201}
{"step": 64800, "loss": 0.24546097218990326, "loss_nce": 0.24520912766456604, "loss_mse": 0.0025184552650898695, "lr": 0.0003098053715235827, "grad_norm": 0.13544198870658875, "wall_ms": 899440}
{"step": 64900, "loss": 0.2566002309322357, "loss_nce": 0.2563474774360657, "loss_mse": 0.002527511678636074, "lr": 0.0003093017698335712, "grad_norm": 0.1423761546611786, "wall_ms": 900681}
{"step": 65000, "loss": 0.26891380548477173, "loss_nce": 0.2686693072319031, "loss_mse": 0.002445011166855693, "lr": 0.0003087979267414659, "grad_norm": 0.13280674815177917, "wall_ms": 901930}
{"step": 65100, "loss": 0.27225223183631897, "loss_nce": 0.2720065712928772, "loss_mse": 0.00245663826353848, "lr": 0.0003082938444871348, "grad_norm": 0.13589929044246674, "wall_ms": 903174}
{"step": 65200, "loss": 0.27436190843582153, "loss_nce": 0.27411240339279175, "loss_mse": 0.00249491143040359, "lr": 0.00030778952531150864, "grad_norm": 0.12770682573318481, "wall_ms": 904415}
{"step": 65300, "loss": 0.24796926975250244, "loss_nce": 0.2477249652147293, "loss_mse": 0.0024430835619568825, "lr": 0.00030728497145657134, "grad_norm": 0.1275474727153778, "wall_ms": 905661}
{"step": 65400, "loss": 0.28058281540870667, "loss_nce": 0.2803296148777008, "loss_mse": 0.002532114740461111, "lr": 0.00030678018516535054, "grad_norm": 0.13377262651920319, "wall_ms": 906909}
{"step": 65500, "loss": 0.21808768808841705, "loss_nce": 0.21784532070159912, "loss_mse": 0.002423670841380954, "lr": 0.0003062751686819069, "grad_norm": 0.11651068180799484, "wall_ms": 908170}
{"step": 65600, "loss": 0.2521814703941345, "loss_nce": 0.25193458795547485, "loss_mse": 0.002468914957717061, "lr": 0.0003057699242513243, "grad_norm": 0.13237260282039642, "wall_ms": 909432}
{"step": 65700, "loss": 0.23746861517429352, "loss_nce": 0.23722252249717712, "loss_mse": 0.0024608715903013945, "lr": 0.00030526445411970015, "grad_norm": 0.11988173425197601, "wall_ms": 910673}
{"step": 65800, "loss": 0.2682923376560211, "loss_nce": 0.2680429220199585, "loss_mse": 0.0024941074661910534, "lr": 0.0003047587605341353, "grad_norm": 0.13374729454517365, "wall_ms": 911923}
{"step": 65900, "loss": 0.2530883550643921, "loss_nce": 0.252836138010025, "loss_mse": 0.0025222087278962135, "lr": 0.00030425284574272393, "grad_norm": 0.13868941366672516, "wall_ms": 913173}
{"step": 66000, "loss": 0.22263477742671967, "loss_nce": 0.22238078713417053, "loss_mse": 0.002539960201829672, "lr": 0.00030374671199454343, "grad_norm": 0.11908241361379623, "wall_ms": 914423}
{"step": 66100, "loss": 0.2747240662574768, "loss_nce": 0.2744706869125366, "loss_mse": 0.00253384280949831, "lr": 0.00030324036153964477, "grad_norm": 0.1353820413351059, "wall_ms": 915668}
{"step": 66200, "loss": 0.2757011353969574, "loss_nce": 0.27544867992401123, "loss_mse": 0.0025246136356145144, "lr": 0.00030273379662904226, "grad_norm": 0.13453267514705658, "wall_ms": 916910}
{"step": 66300, "loss": 0.2595385015010834, "loss_nce": 0.2592884302139282, "loss_mse": 0.0025006739888340235, "lr": 0.00030222701951470357, "grad_norm": 0.13479475677013397, "wall_ms": 918153}
{"step": 66400, "loss": 0.25796687602996826, "loss_nce": 0.2577165961265564, "loss_mse": 0.0025027201045304537, "lr": 0.00030172003244953964, "grad_norm": 0.12354344874620438, "wall_ms": 919420}
{"step": 66500, "loss": 0.23373886942863464, "loss_nce": 0.233494371175766, "loss_mse": 0.002445024671033025, "lr": 0.000301212837687395, "grad_norm": 0.1349814385175705, "wall_ms": 920679}
{"step": 66600, "loss": 0.2586840093135834, "loss_nce": 0.2584380805492401, "loss_mse": 0.0024591409601271152, "lr": 0.0003007054374830371, "grad_norm": 0.13150863349437714, "wall_ms": 921943}
{"step": 66700, "loss": 0.23380811512470245, "loss_nce": 0.23356199264526367, "loss_mse": 0.00246121222153306, "lr": 0.0003001978340921472, "grad_norm": 0.11812502145767212, "wall_ms": 923199}
{"step": 66800, "loss": 0.25403374433517456, "loss_nce": 0.2537802457809448, "loss_mse": 0.0025348374620079994, "lr": 0.0002996900297713097, "grad_norm": 0.1326388269662857, "wall_ms": 924454}
{"step": 66900, "loss": 0.2426663637161255, "loss_nce": 0.24241754412651062, "loss_mse": 0.0024881609715521336, "lr": 0.0002991820267780019, "grad_norm": 0.11260880529880524, "wall_ms": 925697}
{"step": 67000, "loss": 0.26545804738998413, "loss_nce": 0.2652042508125305, "loss_mse": 0.0025380116421729326, "lr": 0.0002986738273705846, "grad_norm": 0.1374060958623886, "wall_ms": 926937}
{"step": 67100, "loss": 0.24723787605762482, "loss_nce": 0.2469867765903473, "loss_mse": 0.0025110647547990084, "lr": 0.0002981654338082918, "grad_norm": 0.14016862213611603, "wall_ms": 928180}
{"step": 67200, "loss": 0.2702682912349701, "loss_nce": 0.27001917362213135, "loss_mse": 0.002491108374670148, "lr": 0.0002976568483512206, "grad_norm": 0.13006015121936798, "wall_ms": 929419}
{"step": 67300, "loss": 0.2654067873954773, "loss_nce": 0.2651590406894684, "loss_mse": 0.0024774258490651846, "lr": 0.000297148073260321, "grad_norm": 0.1390482783317566, "wall_ms": 930658}
{"step": 67400, "loss": 0.24624182283878326, "loss_nce": 0.24599656462669373, "loss_mse": 0.0024526012130081654, "lr": 0.00029663911079738626, "grad_norm": 0.12871433794498444, "wall_ms": 931896}
{"step": 67500, "loss": 0.24631084501743317, "loss_nce": 0.24605891108512878, "loss_mse": 0.0025193511974066496, "lr": 0.00029612996322504254, "grad_norm": 0.11859950423240662, "wall_ms": 933135}
{"step": 67600, "loss": 0.25436413288116455, "loss_nce": 0.2541120648384094, "loss_mse": 0.0025206010323017836, "lr": 0.00029562063280673883, "grad_norm": 0.12905561923980713, "wall_ms": 934373}
{"step": 67700, "loss": 0.30059295892715454, "loss_nce": 0.30034178495407104, "loss_mse": 0.0025117904879152775, "lr": 0.00029511112180673714, "grad_norm": 0.14248663187026978, "wall_ms": 935611}
{"step": 67800, "loss": 0.24222275614738464, "loss_nce": 0.24197548627853394, "loss_mse": 0.0024726777337491512, "lr": 0.00029460143249010213, "grad_norm": 0.1358838677406311, "wall_ms": 936849}
{"step": 67900, "loss": 0.27134570479393005, "loss_nce": 0.27109983563423157, "loss_mse": 0.0024587400257587433, "lr": 0.0002940915671226912, "grad_norm": 0.1382778137922287, "wall_ms": 938087}
{"step": 68000, "loss": 0.2330448031425476, "loss_nce": 0.23279526829719543, "loss_mse": 0.002495348220691085, "lr": 0.0002935815279711444, "grad_norm": 0.12678469717502594, "wall_ms": 939328}
{"step": 68100, "loss": 0.2676023244857788, "loss_nce": 0.26735252141952515, "loss_mse": 0.0024980083107948303, "lr": 0.0002930713173028744, "grad_norm": 0.13927754759788513, "wall_ms": 940569}
{"step": 68200, "loss": 0.25010162591934204, "loss_nce": 0.24985463917255402, "loss_mse": 0.0024699862115085125, "lr": 0.00029256093738605634, "grad_norm": 0.1272761970758438, "wall_ms": 941809}
{"step": 68300, "loss": 0.2814752757549286, "loss_nce": 0.28122377395629883, "loss_mse": 0.0025149937719106674, "lr": 0.0002920503904896176, "grad_norm": 0.13764382898807526, "wall_ms": 943054}
{"step": 68400, "loss": 0.23981180787086487, "loss_nce": 0.2395622432231903, "loss_mse": 0.002495627384632826, "lr": 0.0002915396788832282, "grad_norm": 0.13119596242904663, "wall_ms": 944304}
{"step": 68500, "loss": 0.2588077783584595, "loss_nce": 0.2585529386997223, "loss_mse": 0.002548492280766368, "lr": 0.00029102880483729027, "grad_norm": 0.13792236149311066, "wall_ms": 945570}
{"step": 68600, "loss": 0.2375674992799759, "loss_nce": 0.23731476068496704, "loss_mse": 0.0025273533537983894, "lr": 0.0002905177706229279, "grad_norm": 0.11327836662530899, "wall_ms": 946835}
{"step": 68700, "loss": 0.30345118045806885, "loss_nce": 0.30319589376449585, "loss_mse": 0.0025528455153107643, "lr": 0.00029000657851197735, "grad_norm": 0.14735430479049683, "wall_ms": 948099}
{"step": 68800, "loss": 0.20762895047664642, "loss_nce": 0.20738449692726135, "loss_mse": 0.0024445271119475365, "lr": 0.00028949523077697676, "grad_norm": 0.11961120367050171, "wall_ms": 949359}
{"step": 68900, "loss": 0.26109927892684937, "loss_nce": 0.2608504891395569, "loss_mse": 0.0024878180120140314, "lr": 0.0002889837296911563, "grad_norm": 0.12464818358421326, "wall_ms": 950619}
{"step": 69000, "loss": 0.2750813663005829, "loss_nce": 0.2748258709907532, "loss_mse": 0.0025550273712724447, "lr": 0.0002884720775284276, "grad_norm": 0.14449095726013184, "wall_ms": 951884}
{"step": 69100, "loss": 0.25765860080718994, "loss_nce": 0.2574056386947632, "loss_mse": 0.0025295759551227093, "lr": 0.00028796027656337417, "grad_norm": 0.13986732065677643, "wall_ms": 953149}
{"step": 69200, "loss": 0.2591719925403595, "loss_nce": 0.25892549753189087, "loss_mse": 0.0024648611433804035, "lr": 0.0002874483290712406, "grad_norm": 0.13668721914291382, "wall_ms": 954404}
{"step": 69300, "loss": 0.23435528576374054, "loss_nce": 0.23410746455192566, "loss_mse": 0.002478240290656686, "lr": 0.00028693623732792354, "grad_norm": 0.1365923136472702, "wall_ms": 955664}
{"step": 69400, "loss": 0.2460673749446869, "loss_nce": 0.24581637978553772, "loss_mse": 0.002509969985112548, "lr": 0.0002864240036099605, "grad_norm": 0.12617817521095276, "wall_ms": 956926}
{"step": 69500, "loss": 0.25384101271629333, "loss_nce": 0.25359028577804565, "loss_mse": 0.0025071711279451847, "lr": 0.0002859116301945201, "grad_norm": 0.1323191225528717, "wall_ms": 958180}
{"step": 69600, "loss": 0.2699401080608368, "loss_nce": 0.26968932151794434, "loss_mse": 0.0025080072227865458, "lr": 0.0002853991193593921, "grad_norm": 0.13900521397590637, "wall_ms": 959439}
{"step": 69700, "loss": 0.24919380247592926, "loss_nce": 0.24894419312477112, "loss_mse": 0.0024960809387266636, "lr": 0.0002848864733829772, "grad_norm": 0.12671104073524475, "wall_ms": 960686}
{"step": 69800, "loss": 0.23502421379089355, "loss_nce": 0.23477746546268463, "loss_mse": 0.0024675398599356413, "lr": 0.0002843736945442768, "grad_norm": 0.12574359774589539, "wall_ms": 961930}
{"step": 69900, "loss": 0.270855575799942, "loss_nce": 0.270602285861969, "loss_mse": 0.0025329329073429108, "lr": 0.00028386078512288303, "grad_norm": 0.13553054630756378, "wall_ms": 963172}
{"step": 70000, "loss": 0.24495624005794525, "loss_nce": 0.24469971656799316, "loss_mse": 0.0025652770418673754, "lr": 0.00028334774739896854, "grad_norm": 0.12352975457906723, "wall_ms": 964414}
{"step": 70100, "loss": 0.23864418268203735, "loss_nce": 0.2383916974067688, "loss_mse": 0.002524815034121275, "lr": 0.00028283458365327625, "grad_norm": 0.1267652064561844, "wall_ms": 978377}
{"step": 70200, "loss": 0.26716795563697815, "loss_nce": 0.2669166326522827, "loss_mse": 0.0025131104048341513, "lr": 0.00028232129616710943, "grad_norm": 0.1291598230600357, "wall_ms": 979617}
{"step": 70300, "loss": 0.2655031085014343, "loss_nce": 0.26525115966796875, "loss_mse": 0.0025196305941790342, "lr": 0.0002818078872223213, "grad_norm": 0.1303791105747223, "wall_ms": 980854}
{"step": 70400, "loss": 0.25890642404556274, "loss_nce": 0.25866109132766724, "loss_mse": 0.002453281544148922, "lr": 0.0002812943591013053, "grad_norm": 0.11856551468372345, "wall_ms": 982442}
{"step": 70500, "loss": 0.2740461230278015, "loss_nce": 0.2737939953804016, "loss_mse": 0.002521311165764928, "lr": 0.00028078071408698443, "grad_norm": 0.12965522706508636, "wall_ms": 983711}
{"step": 70600, "loss": 0.24012601375579834, "loss_nce": 0.23987612128257751, "loss_mse": 0.002498963847756386, "lr": 0.0002802669544628014, "grad_norm": 0.12342159450054169, "wall_ms": 984981}
{"step": 70700, "loss": 0.22465594112873077, "loss_nce": 0.22440673410892487, "loss_mse": 0.0024920960422605276, "lr": 0.0002797530825127083, "grad_norm": 0.12263680249452591, "wall_ms": 986242}
{"step": 70800, "loss": 0.23079638183116913, "loss_nce": 0.23054510354995728, "loss_mse": 0.0025127383414655924, "lr": 0.00027923910052115707, "grad_norm": 0.11513394862413406, "wall_ms": 987509}
{"step": 70900, "loss": 0.24609830975532532, "loss_nce": 0.24584849178791046, "loss_mse": 0.0024981133174151182, "lr": 0.00027872501077308827, "grad_norm": 0.126046285033226, "wall_ms": 988795}
{"step": 71000, "loss": 0.2013036012649536, "loss_nce": 0.20106224715709686, "loss_mse": 0.002413578098639846, "lr": 0.00027821081555392174, "grad_norm": 0.1137230172753334, "wall_ms": 990045}
{"step": 71100, "loss": 0.23190800845623016, "loss_nce": 0.23166395723819733, "loss_mse": 0.002440568758174777, "lr": 0.0002776965171495463, "grad_norm": 0.12168353796005249, "wall_ms": 991288}
{"step": 71200, "loss": 0.2758890986442566, "loss_nce": 0.27564072608947754, "loss_mse": 0.0024836661759763956, "lr": 0.0002771821178463092, "grad_norm": 0.13299894332885742, "wall_ms": 992527}
{"step": 71300, "loss": 0.24353563785552979, "loss_nce": 0.2432871162891388, "loss_mse": 0.002485244069248438, "lr": 0.0002766676199310065, "grad_norm": 0.11723426729440689, "wall_ms": 993770}
{"step": 71400, "loss": 0.2576892673969269, "loss_nce": 0.25744229555130005, "loss_mse": 0.002469736384227872, "lr": 0.00027615302569087267, "grad_norm": 0.12015244364738464, "wall_ms": 995010}
{"step": 71500, "loss": 0.2619011700153351, "loss_nce": 0.26165443658828735, "loss_mse": 0.0024673601146787405, "lr": 0.0002756383374135702, "grad_norm": 0.14161251485347748, "wall_ms": 996259}
{"step": 71600, "loss": 0.23225991427898407, "loss_nce": 0.23200716078281403, "loss_mse": 0.0025275847874581814, "lr": 0.00027512355738717965, "grad_norm": 0.12602023780345917, "wall_ms": 997501}
{"step": 71700, "loss": 0.26334288716316223, "loss_nce": 0.26309120655059814, "loss_mse": 0.0025169197469949722, "lr": 0.00027460868790018963, "grad_norm": 0.14274445176124573, "wall_ms": 998751}
{"step": 71800, "loss": 0.1901589184999466, "loss_nce": 0.18991541862487793, "loss_mse": 0.002434980124235153, "lr": 0.0002740937312414862, "grad_norm": 0.10573408007621765, "wall_ms": 1000002}
{"step": 71900, "loss": 0.21048776805400848, "loss_nce": 0.21023786067962646, "loss_mse": 0.0024991005193442106, "lr": 0.0002735786897003433, "grad_norm": 0.11284808069467545, "wall_ms": 1001245}
{"step": 72000, "loss": 0.22274263203144073, "loss_nce": 0.22249168157577515, "loss_mse": 0.002509532030671835, "lr": 0.0002730635655664119, "grad_norm": 0.11459267139434814, "wall_ms": 1002492}
{"step": 72100, "loss": 0.23170775175094604, "loss_nce": 0.23145842552185059, "loss_mse": 0.002493208972737193, "lr": 0.0002725483611297103, "grad_norm": 0.12082113325595856, "wall_ms": 1003736}
{"step": 72200, "loss": 0.2544861137866974, "loss_nce": 0.25423794984817505, "loss_mse": 0.002481653355062008, "lr": 0.00027203307868061365, "grad_norm": 0.1372571438550949, "wall_ms": 1004985}
{"step": 72300, "loss": 0.2494513988494873, "loss_nce": 0.24920472502708435, "loss_mse": 0.002466693287715316, "lr": 0.0002715177205098442, "grad_norm": 0.13074210286140442, "wall_ms": 1006239}
{"step": 72400, "loss": 0.24878856539726257, "loss_nce": 0.24853122234344482, "loss_mse": 0.0025734235532581806, "lr": 0.0002710022889084605, "grad_norm": 0.1274222582578659, "wall_ms": 1007486}
{"step": 72500, "loss": 0.24958297610282898, "loss_nce": 0.24933424592018127, "loss_mse": 0.002487252466380596, "lr": 0.00027048678616784763, "grad_norm": 0.12268377840518951, "wall_ms": 1008743}
{"step": 72600, "loss": 0.23544122278690338, "loss_nce": 0.2351928949356079, "loss_mse": 0.002483294578269124, "lr": 0.00026997121457970696, "grad_norm": 0.12936198711395264, "wall_ms": 1010001}
{"step": 72700, "loss": 0.2530592679977417, "loss_nce": 0.2528040409088135, "loss_mse": 0.0025522613432258368, "lr": 0.0002694555764360462, "grad_norm": 0.12970928847789764, "wall_ms": 1011265}
{"step": 72800, "loss": 0.20652227103710175, "loss_nce": 0.20627515017986298, "loss_mse": 0.002471273299306631, "lr": 0.0002689398740291684, "grad_norm": 0.11019882559776306, "wall_ms": 1012530}
{"step": 72900, "loss": 0.2750962972640991, "loss_nce": 0.27484533190727234, "loss_mse": 0.0025096030440181494, "lr": 0.0002684241096516627, "grad_norm": 0.14122548699378967, "wall_ms": 1013789}
{"step": 73000, "loss": 0.2628207206726074, "loss_nce": 0.26257067918777466, "loss_mse": 0.0025004197377711535, "lr": 0.0002679082855963935, "grad_norm": 0.12263032048940659, "wall_ms": 1015036}
{"step": 73100, "loss": 0.2586359977722168, "loss_nce": 0.25838178396224976, "loss_mse": 0.00254217186011374, "lr": 0.0002673924041564907, "grad_norm": 0.1418810784816742, "wall_ms": 1016293}
{"step": 73200, "loss": 0.22231565415859222, "loss_nce": 0.2220679670572281, "loss_mse": 0.0024768440052866936, "lr": 0.0002668764676253392, "grad_norm": 0.11631019413471222, "wall_ms": 1017539}
{"step": 73300, "loss": 0.2678847908973694, "loss_nce": 0.2676323652267456, "loss_mse": 0.002524273470044136, "lr": 0.0002663604782965688, "grad_norm": 0.12856805324554443, "wall_ms": 1018782}
{"step": 73400, "loss": 0.23724307119846344, "loss_nce": 0.23699542880058289, "loss_mse": 0.002476460300385952, "lr": 0.00026584443846404393, "grad_norm": 0.12547343969345093, "wall_ms": 1020024}
{"step": 73500, "loss": 0.24963702261447906, "loss_nce": 0.24938960373401642, "loss_mse": 0.0024742549285292625, "lr": 0.0002653283504218538, "grad_norm": 0.1337297260761261, "wall_ms": 1021262}
{"step": 73600, "loss": 0.23587137460708618, "loss_nce": 0.23562365770339966, "loss_mse": 0.0024772067554295063, "lr": 0.0002648122164643018, "grad_norm": 0.1301194727420807, "wall_ms": 1022507}
{"step": 73700, "loss": 0.24361366033554077, "loss_nce": 0.24336174130439758, "loss_mse": 0.0025192496832460165, "lr": 0.0002642960388858954, "grad_norm": 0.12671925127506256, "wall_ms": 1023747}
{"step": 73800, "loss": 0.2642166018486023, "loss_nce": 0.2639586925506592, "loss_mse": 0.002579119987785816, "lr": 0.00026377981998133593, "grad_norm": 0.1308223158121109, "wall_ms": 1024988}
{"step": 73900, "loss": 0.2877354025840759, "loss_nce": 0.2874876856803894, "loss_mse": 0.0024772139731794596, "lr": 0.0002632635620455085, "grad_norm": 0.13564777374267578, "wall_ms": 1026238}
{"step": 74000, "loss": 0.22706471383571625, "loss_nce": 0.22681868076324463, "loss_mse": 0.002460331888869405, "lr": 0.0002627472673734719, "grad_norm": 0.11795688420534134, "wall_ms": 1027507}
{"step": 74100, "loss": 0.23567183315753937, "loss_nce": 0.23541918396949768, "loss_mse": 0.0025265165604650974, "lr": 0.0002622309382604481, "grad_norm": 0.11921150237321854, "wall_ms": 1028778}
{"step": 74200, "loss": 0.2273639291524887, "loss_nce": 0.2271152138710022, "loss_mse": 0.0024870876222848892, "lr": 0.00026171457700181197, "grad_norm": 0.10871629416942596, "wall_ms": 1030046}
{"step": 74300, "loss": 0.2405451536178589, "loss_nce": 0.2402959167957306, "loss_mse": 0.0024923935998231173, "lr": 0.00026119818589308177, "grad_norm": 0.13068076968193054, "wall_ms": 1031317}
{"step": 74400, "loss": 0.24845778942108154, "loss_nce": 0.24820543825626373, "loss_mse": 0.0025234855711460114, "lr": 0.0002606817672299079, "grad_norm": 0.12512537837028503, "wall_ms": 1032587}
{"step": 74500, "loss": 0.29550179839134216, "loss_nce": 0.2952490746974945, "loss_mse": 0.002527289791032672, "lr": 0.0002601653233080637, "grad_norm": 0.13540159165859222, "wall_ms": 1033858}
{"step": 74600, "loss": 0.207423135638237, "loss_nce": 0.20717838406562805, "loss_mse": 0.002447571838274598, "lr": 0.0002596488564234347, "grad_norm": 0.11594925075769424, "wall_ms": 1035126}
{"step": 74700, "loss": 0.21877844631671906, "loss_nce": 0.21853119134902954, "loss_mse": 0.002472520340234041, "lr": 0.0002591323688720082, "grad_norm": 0.11543426662683487, "wall_ms": 1036398}
{"step": 74800, "loss": 0.24331049621105194, "loss_nce": 0.24305720627307892, "loss_mse": 0.002532828599214554, "lr": 0.0002586158629498638, "grad_norm": 0.1234588623046875, "wall_ms": 1037666}
{"step": 74900, "loss": 0.25967347621917725, "loss_nce": 0.2594184875488281, "loss_mse": 0.0025498613249510527, "lr": 0.0002580993409531626, "grad_norm": 0.12409351766109467, "wall_ms": 1038936}
{"step": 75000, "loss": 0.24101565778255463, "loss_nce": 0.2407643347978592, "loss_mse": 0.0025132650043815374, "lr": 0.000257582805178137, "grad_norm": 0.12586309015750885, "wall_ms": 1040209}
{"step": 75100, "loss": 0.20517882704734802, "loss_nce": 0.20493453741073608, "loss_mse": 0.0024429101031273603, "lr": 0.0002570662579210809, "grad_norm": 0.11115272343158722, "wall_ms": 1041477}
{"step": 75200, "loss": 0.23566554486751556, "loss_nce": 0.23541706800460815, "loss_mse": 0.0024847262538969517, "lr": 0.0002565497014783393, "grad_norm": 0.12088780105113983, "wall_ms": 1042746}
{"step": 75300, "loss": 0.22038519382476807, "loss_nce": 0.22013753652572632, "loss_mse": 0.0024766067508608103, "lr": 0.0002560331381462976, "grad_norm": 0.12210887670516968, "wall_ms": 1044010}
{"step": 75400, "loss": 0.23172861337661743, "loss_nce": 0.2314806580543518, "loss_mse": 0.0024795986246317625, "lr": 0.00025551657022137225, "grad_norm": 0.13070276379585266, "wall_ms": 1045277}
{"step": 75500, "loss": 0.251438707113266, "loss_nce": 0.2511886954307556, "loss_mse": 0.0025002132169902325, "lr": 0.000255, "grad_norm": 0.12113604694604874, "wall_ms": 1046559}
{"step": 75600, "loss": 0.25813940167427063, "loss_nce": 0.25789061188697815, "loss_mse": 0.0024877660907804966, "lr": 0.00025448342977862773, "grad_norm": 0.14131014049053192, "wall_ms": 1047846}
{"step": 75700, "loss": 0.2617212235927582, "loss_nce": 0.26146697998046875, "loss_mse": 0.002542491303756833, "lr": 0.00025396686185370245, "grad_norm": 0.1290719211101532, "wall_ms": 1049110}
{"step": 75800, "loss": 0.22084788978099823, "loss_nce": 0.22060799598693848, "loss_mse": 0.002398952841758728, "lr": 0.0002534502985216608, "grad_norm": 0.1264299750328064, "wall_ms": 1050379}
{"step": 75900, "loss": 0.23843106627464294, "loss_nce": 0.23818069696426392, "loss_mse": 0.002503755735233426, "lr": 0.0002529337420789191, "grad_norm": 0.12211528420448303, "wall_ms": 1051644}
{"step": 76000, "loss": 0.2113170474767685, "loss_nce": 0.21107637882232666, "loss_mse": 0.0024067505728453398, "lr": 0.00025241719482186297, "grad_norm": 0.12438313663005829, "wall_ms": 1052920}
{"step": 76100, "loss": 0.2352321594953537, "loss_nce": 0.2349851429462433, "loss_mse": 0.0024700919166207314, "lr": 0.00025190065904683745, "grad_norm": 0.1284315139055252, "wall_ms": 1054222}
{"step": 76200, "loss": 0.24545755982398987, "loss_nce": 0.2452060878276825, "loss_mse": 0.0025146519765257835, "lr": 0.0002513841370501362, "grad_norm": 0.14057470858097076, "wall_ms": 1055521}
{"step": 76300, "loss": 0.24255196750164032, "loss_nce": 0.24230198562145233, "loss_mse": 0.002499812049791217, "lr": 0.0002508676311279918, "grad_norm": 0.1266053318977356, "wall_ms": 1056800}
{"step": 76400, "loss": 0.22010280191898346, "loss_nce": 0.2198495864868164, "loss_mse": 0.002532221842557192, "lr": 0.0002503511435765654, "grad_norm": 0.1240692213177681, "wall_ms": 1058054}
{"step": 76500, "loss": 0.27218565344810486, "loss_nce": 0.2719380259513855, "loss_mse": 0.0024761310778558254, "lr": 0.00024983467669193637, "grad_norm": 0.13164807856082916, "wall_ms": 1059307}
{"step": 76600, "loss": 0.2367810606956482, "loss_nce": 0.23653116822242737, "loss_mse": 0.0024988672230392694, "lr": 0.00024931823277009216, "grad_norm": 0.12110433727502823, "wall_ms": 1060558}
{"step": 76700, "loss": 0.23223502933979034, "loss_nce": 0.2319846898317337, "loss_mse": 0.0025033692363649607, "lr": 0.0002488018141069183, "grad_norm": 0.12791089713573456, "wall_ms": 1061818}
{"step": 76800, "loss": 0.25375351309776306, "loss_nce": 0.25350379943847656, "loss_mse": 0.0024970476515591145, "lr": 0.000248285422998188, "grad_norm": 0.12995168566703796, "wall_ms": 1063061}
{"step": 76900, "loss": 0.23949269950389862, "loss_nce": 0.239241361618042, "loss_mse": 0.0025133828166872263, "lr": 0.00024776906173955195, "grad_norm": 0.11880593001842499, "wall_ms": 1064313}
{"step": 77000, "loss": 0.2678181231021881, "loss_nce": 0.2675701379776001, "loss_mse": 0.002479802118614316, "lr": 0.00024725273262652807, "grad_norm": 0.1240948736667633, "wall_ms": 1065557}
{"step": 77100, "loss": 0.2546127736568451, "loss_nce": 0.2543655037879944, "loss_mse": 0.0024726339615881443, "lr": 0.00024673643795449147, "grad_norm": 0.1323385238647461, "wall_ms": 1066804}
{"step": 77200, "loss": 0.20387686789035797, "loss_nce": 0.20362958312034607, "loss_mse": 0.0024729131255298853, "lr": 0.00024622018001866416, "grad_norm": 0.11452298611402512, "wall_ms": 1068073}
{"step": 77300, "loss": 0.2442770153284073, "loss_nce": 0.2440297156572342, "loss_mse": 0.0024729471188038588, "lr": 0.0002457039611141047, "grad_norm": 0.12440124899148941, "wall_ms": 1069337}
{"step": 77400, "loss": 0.25781673192977905, "loss_nce": 0.2575686573982239, "loss_mse": 0.0024807092268019915, "lr": 0.00024518778353569825, "grad_norm": 0.1270899772644043, "wall_ms": 1070584}
{"step": 77500, "loss": 0.22798436880111694, "loss_nce": 0.2277325689792633, "loss_mse": 0.0025179407093673944, "lr": 0.0002446716495781462, "grad_norm": 0.12715594470500946, "wall_ms": 1071840}
{"step": 77600, "loss": 0.20546212792396545, "loss_nce": 0.20521318912506104, "loss_mse": 0.002489406382665038, "lr": 0.0002441555615359561, "grad_norm": 0.116768978536129, "wall_ms": 1073103}
{"step": 77700, "loss": 0.23513035476207733, "loss_nce": 0.23487958312034607, "loss_mse": 0.0025077855680137873, "lr": 0.00024363952170343135, "grad_norm": 0.12281248718500137, "wall_ms": 1074358}
{"step": 77800, "loss": 0.2485959827899933, "loss_nce": 0.24834881722927094, "loss_mse": 0.0024717056658118963, "lr": 0.0002431235323746608, "grad_norm": 0.13086660206317902, "wall_ms": 1075619}
{"step": 77900, "loss": 0.21645383536815643, "loss_nce": 0.21620863676071167, "loss_mse": 0.0024519958533346653, "lr": 0.00024260759584350931, "grad_norm": 0.12202553451061249, "wall_ms": 1076873}
{"step": 78000, "loss": 0.23422186076641083, "loss_nce": 0.23397032916545868, "loss_mse": 0.002515326486900449, "lr": 0.00024209171440360653, "grad_norm": 0.1257549673318863, "wall_ms": 1078125}
{"step": 78100, "loss": 0.251556932926178, "loss_nce": 0.2513020634651184, "loss_mse": 0.0025485893711447716, "lr": 0.00024157589034833735, "grad_norm": 0.12187013775110245, "wall_ms": 1079383}
{"step": 78200, "loss": 0.23943252861499786, "loss_nce": 0.2391800433397293, "loss_mse": 0.0025248373858630657, "lr": 0.00024106012597083164, "grad_norm": 0.12738801538944244, "wall_ms": 1080633}
{"step": 78300, "loss": 0.24416600167751312, "loss_nce": 0.24391499161720276, "loss_mse": 0.002510110381990671, "lr": 0.00024054442356395386, "grad_norm": 0.11588253825902939, "wall_ms": 1081889}
{"step": 78400, "loss": 0.24903565645217896, "loss_nce": 0.24878770112991333, "loss_mse": 0.002479566726833582, "lr": 0.00024002878542029305, "grad_norm": 0.13019993901252747, "wall_ms": 1083131}
{"step": 78500, "loss": 0.23837366700172424, "loss_nce": 0.23812545835971832, "loss_mse": 0.002482064999639988, "lr": 0.00023951321383215246, "grad_norm": 0.12156662344932556, "wall_ms": 1084375}
{"step": 78600, "loss": 0.22416386008262634, "loss_nce": 0.22391420602798462, "loss_mse": 0.002496493747457862, "lr": 0.00023899771109153964, "grad_norm": 0.11702027916908264, "wall_ms": 1085620}
{"step": 78700, "loss": 0.22976094484329224, "loss_nce": 0.2295146882534027, "loss_mse": 0.0024625405203551054, "lr": 0.00023848227949015594, "grad_norm": 0.12143515050411224, "wall_ms": 1086862}
{"step": 78800, "loss": 0.24603867530822754, "loss_nce": 0.24578368663787842, "loss_mse": 0.0025499039329588413, "lr": 0.00023796692131938633, "grad_norm": 0.1458938717842102, "wall_ms": 1088106}
{"step": 78900, "loss": 0.25212985277175903, "loss_nce": 0.25188344717025757, "loss_mse": 0.0024640806950628757, "lr": 0.00023745163887028975, "grad_norm": 0.13643696904182434, "wall_ms": 1089347}
{"step": 79000, "loss": 0.2434529811143875, "loss_nce": 0.24319905042648315, "loss_mse": 0.0025393152609467506, "lr": 0.00023693643443358815, "grad_norm": 0.1256663203239441, "wall_ms": 1090587}
{"step": 79100, "loss": 0.27241379022598267, "loss_nce": 0.2721610367298126, "loss_mse": 0.0025274655781686306, "lr": 0.00023642131029965677, "grad_norm": 0.13774168491363525, "wall_ms": 1091827}
{"step": 79200, "loss": 0.2367800623178482, "loss_nce": 0.2365286648273468, "loss_mse": 0.002513973508030176, "lr": 0.00023590626875851384, "grad_norm": 0.1258641481399536, "wall_ms": 1093065}
{"step": 79300, "loss": 0.27145150303840637, "loss_nce": 0.2712021768093109, "loss_mse": 0.0024931891821324825, "lr": 0.00023539131209981048, "grad_norm": 0.13518568873405457, "wall_ms": 1094306}
{"step": 79400, "loss": 0.22492645680904388, "loss_nce": 0.22467845678329468, "loss_mse": 0.0024799450766295195, "lr": 0.00023487644261282046, "grad_norm": 0.12076575309038162, "wall_ms": 1095546}
{"step": 79500, "loss": 0.22406220436096191, "loss_nce": 0.22381573915481567, "loss_mse": 0.0024647009558975697, "lr": 0.0002343616625864299, "grad_norm": 0.11816015839576721, "wall_ms": 1096785}
{"step": 79600, "loss": 0.24675825238227844, "loss_nce": 0.24650779366493225, "loss_mse": 0.0025045303627848625, "lr": 0.00023384697430912742, "grad_norm": 0.1214112788438797, "wall_ms": 1098034}
{"step": 79700, "loss": 0.2303198277950287, "loss_nce": 0.23007270693778992, "loss_mse": 0.002471195999532938, "lr": 0.00023333238006899344, "grad_norm": 0.13737450540065765, "wall_ms": 1099282}
{"step": 79800, "loss": 0.22275905311107635, "loss_nce": 0.22250673174858093, "loss_mse": 0.002523226896300912, "lr": 0.00023281788215369082, "grad_norm": 0.1233719140291214, "wall_ms": 1100520}
{"step": 79900, "loss": 0.24721533060073853, "loss_nce": 0.24696457386016846, "loss_mse": 0.002507604891434312, "lr": 0.00023230348285045377, "grad_norm": 0.12869226932525635, "wall_ms": 1101763}
{"step": 80000, "loss": 0.25574490427970886, "loss_nce": 0.2554927468299866, "loss_mse": 0.0025216720532625914, "lr": 0.00023178918444607827, "grad_norm": 0.12625811994075775, "wall_ms": 1103006}
{"step": 80100, "loss": 0.23766228556632996, "loss_nce": 0.2374124526977539, "loss_mse": 0.0024983214680105448, "lr": 0.00023127498922691182, "grad_norm": 0.13388094305992126, "wall_ms": 1116900}
{"step": 80200, "loss": 0.24233779311180115, "loss_nce": 0.24209123849868774, "loss_mse": 0.0024655451998114586, "lr": 0.00023076089947884294, "grad_norm": 0.13674739003181458, "wall_ms": 1118139}
{"step": 80300, "loss": 0.23858550190925598, "loss_nce": 0.23833626508712769, "loss_mse": 0.002492376370355487, "lr": 0.00023024691748729168, "grad_norm": 0.13008037209510803, "wall_ms": 1119376}
{"step": 80400, "loss": 0.2265893816947937, "loss_nce": 0.22633779048919678, "loss_mse": 0.0025159462820738554, "lr": 0.00022973304553719874, "grad_norm": 0.11564674973487854, "wall_ms": 1120619}
{"step": 80500, "loss": 0.2662256360054016, "loss_nce": 0.2659749984741211, "loss_mse": 0.0025064984802156687, "lr": 0.0002292192859130157, "grad_norm": 0.13524094223976135, "wall_ms": 1121861}
{"step": 80600, "loss": 0.23081542551517487, "loss_nce": 0.23056524991989136, "loss_mse": 0.0025017596781253815, "lr": 0.00022870564089869466, "grad_norm": 0.12884603440761566, "wall_ms": 1123105}
{"step": 80700, "loss": 0.2719036042690277, "loss_nce": 0.2716566026210785, "loss_mse": 0.0024701219517737627, "lr": 0.0002281921127776787, "grad_norm": 0.12423869222402573, "wall_ms": 1124346}
{"step": 80800, "loss": 0.2514532506465912, "loss_nce": 0.25120264291763306, "loss_mse": 0.002506182761862874, "lr": 0.0002276787038328906, "grad_norm": 0.1262264996767044, "wall_ms": 1125587}
{"step": 80900, "loss": 0.22420768439769745, "loss_nce": 0.22395378351211548, "loss_mse": 0.0025389501824975014, "lr": 0.0002271654163467238, "grad_norm": 0.12047075480222702, "wall_ms": 1126829}
{"step": 81000, "loss": 0.25582000613212585, "loss_nce": 0.25557202100753784, "loss_mse": 0.0024798454251140356, "lr": 0.00022665225260103152, "grad_norm": 0.13950864970684052, "wall_ms": 1128068}
{"step": 81100, "loss": 0.20673586428165436, "loss_nce": 0.2064845710992813, "loss_mse": 0.0025129930581897497, "lr": 0.00022613921487711698, "grad_norm": 0.11463062465190887, "wall_ms": 1129308}
{"step": 81200, "loss": 0.22644519805908203, "loss_nce": 0.2261936366558075, "loss_mse": 0.0025155667681246996, "lr": 0.00022562630545572325, "grad_norm": 0.11400771886110306, "wall_ms": 1130547}
{"step": 81300, "loss": 0.24389341473579407, "loss_nce": 0.24364586174488068, "loss_mse": 0.002475474029779434, "lr": 0.00022511352661702287, "grad_norm": 0.1357879936695099, "wall_ms": 1131788}
{"step": 81400, "loss": 0.22810080647468567, "loss_nce": 0.22785110771656036, "loss_mse": 0.0024970334488898516, "lr": 0.000224600880640608, "grad_norm": 0.12401828169822693, "wall_ms": 1133028}
{"step": 81500, "loss": 0.2505891025066376, "loss_nce": 0.25033366680145264, "loss_mse": 0.002554357284680009, "lr": 0.00022408836980547989, "grad_norm": 0.14551042020320892, "wall_ms": 1134300}
{"step": 81600, "loss": 0.2652621865272522, "loss_nce": 0.2650119960308075, "loss_mse": 0.0025018788874149323, "lr": 0.00022357599639003949, "grad_norm": 0.1373181790113449, "wall_ms": 1135571}
{"step": 81700, "loss": 0.2504967451095581, "loss_nce": 0.2502439022064209, "loss_mse": 0.002528436714783311, "lr": 0.00022306376267207636, "grad_norm": 0.13654884696006775, "wall_ms": 1136838}
{"step": 81800, "loss": 0.2566753625869751, "loss_nce": 0.25642162561416626, "loss_mse": 0.0025374842807650566, "lr": 0.00022255167092875936, "grad_norm": 0.13612604141235352, "wall_ms": 1138106}
{"step": 81900, "loss": 0.22582773864269257, "loss_nce": 0.22557929158210754, "loss_mse": 0.002484455006197095, "lr": 0.00022203972343662592, "grad_norm": 0.1216375082731247, "wall_ms": 1139372}
{"step": 82000, "loss": 0.24922025203704834, "loss_nce": 0.2489739954471588, "loss_mse": 0.00246256566606462, "lr": 0.00022152792247157247, "grad_norm": 0.1318935602903366, "wall_ms": 1140642}
{"step": 82100, "loss": 0.2851904332637787, "loss_nce": 0.28493741154670715, "loss_mse": 0.002530117752030492, "lr": 0.00022101627030884382, "grad_norm": 0.14982633292675018, "wall_ms": 1141900}
{"step": 82200, "loss": 0.2210434228181839, "loss_nce": 0.2207963466644287, "loss_mse": 0.0024707652628421783, "lr": 0.00022050476922302328, "grad_norm": 0.11976338177919388, "wall_ms": 1143142}
{"step": 82300, "loss": 0.24392078816890717, "loss_nce": 0.24367016553878784, "loss_mse": 0.0025062202475965023, "lr": 0.0002199934214880228, "grad_norm": 0.13275772333145142, "wall_ms": 1144406}
{"step": 82400, "loss": 0.24493232369422913, "loss_nce": 0.24468591809272766, "loss_mse": 0.002464106772094965, "lr": 0.0002194822293770721, "grad_norm": 0.13008730113506317, "wall_ms": 1145670}
{"step": 82500, "loss": 0.2286553680896759, "loss_nce": 0.22840431332588196, "loss_mse": 0.0025105527602136135, "lr": 0.0002189711951627097, "grad_norm": 0.12155712395906448, "wall_ms": 1146935}
{"step": 82600, "loss": 0.26843130588531494, "loss_nce": 0.2681822180747986, "loss_mse": 0.002490778686478734, "lr": 0.00021846032111677176, "grad_norm": 0.1381494253873825, "wall_ms": 1148186}
{"step": 82700, "loss": 0.2345697283744812, "loss_nce": 0.2343226969242096, "loss_mse": 0.0024703480303287506, "lr": 0.0002179496095103824, "grad_norm": 0.12719085812568665, "wall_ms": 1149450}
{"step": 82800, "loss": 0.23222452402114868, "loss_nce": 0.23197755217552185, "loss_mse": 0.002469709375873208, "lr": 0.00021743906261394372, "grad_norm": 0.12238233536481857, "wall_ms": 1150709}
{"step": 82900, "loss": 0.2587110996246338, "loss_nce": 0.2584572434425354, "loss_mse": 0.002538513857871294, "lr": 0.00021692868269712569, "grad_norm": 0.12855665385723114, "wall_ms": 1151980}
{"step": 83000, "loss": 0.2751062214374542, "loss_nce": 0.27484914660453796, "loss_mse": 0.002570764860138297, "lr": 0.00021641847202885566, "grad_norm": 0.1382979154586792, "wall_ms": 1153246}
{"step": 83100, "loss": 0.26169878244400024, "loss_nce": 0.2614436149597168, "loss_mse": 0.0025515572633594275, "lr": 0.0002159084328773089, "grad_norm": 0.13283678889274597, "wall_ms": 1154500}
{"step": 83200, "loss": 0.2655157446861267, "loss_nce": 0.26526305079460144, "loss_mse": 0.0025269065517932177, "lr": 0.00021539856750989796, "grad_norm": 0.1361279934644699, "wall_ms": 1155760}
{"step": 83300, "loss": 0.2601686418056488, "loss_nce": 0.25991860032081604, "loss_mse": 0.002500345231965184, "lr": 0.00021488887819326295, "grad_norm": 0.12393368035554886, "wall_ms": 1157044}
{"step": 83400, "loss": 0.2784916162490845, "loss_nce": 0.278243750333786, "loss_mse": 0.002478645881637931, "lr": 0.00021437936719326115, "grad_norm": 0.13167814910411835, "wall_ms": 1158334}
{"step": 83500, "loss": 0.22675307095050812, "loss_nce": 0.226507306098938, "loss_mse": 0.0024575793650001287, "lr": 0.0002138700367749575, "grad_norm": 0.12519150972366333, "wall_ms": 1159603}
{"step": 83600, "loss": 0.24778413772583008, "loss_nce": 0.2475380003452301, "loss_mse": 0.002461396623402834, "lr": 0.0002133608892026138, "grad_norm": 0.12036018073558807, "wall_ms": 1160866}
{"step": 83700, "loss": 0.2341819405555725, "loss_nce": 0.2339363992214203, "loss_mse": 0.002455413108691573, "lr": 0.00021285192673967907, "grad_norm": 0.13518975675106049, "wall_ms": 1162129}
{"step": 83800, "loss": 0.25115475058555603, "loss_nce": 0.250910222530365, "loss_mse": 0.0024452675133943558, "lr": 0.00021234315164877948, "grad_norm": 0.12895585596561432, "wall_ms": 1163386}
{"step": 83900, "loss": 0.2469528764486313, "loss_nce": 0.24670162796974182, "loss_mse": 0.0025125262327492237, "lr": 0.00021183456619170826, "grad_norm": 0.12328632175922394, "wall_ms": 1164633}
{"step": 84000, "loss": 0.273190438747406, "loss_nce": 0.27294325828552246, "loss_mse": 0.0024717373307794333, "lr": 0.00021132617262941547, "grad_norm": 0.13489368557929993, "wall_ms": 1165880}
{"step": 84100, "loss": 0.2532620429992676, "loss_nce": 0.25301215052604675, "loss_mse": 0.002498933579772711, "lr": 0.00021081797322199824, "grad_norm": 0.12651340663433075, "wall_ms": 1167131}
{"step": 84200, "loss": 0.22108210623264313, "loss_nce": 0.22083528339862823, "loss_mse": 0.0024682534858584404, "lr": 0.00021030997022869043, "grad_norm": 0.12547919154167175, "wall_ms": 1168403}
{"step": 84300, "loss": 0.2531519830226898, "loss_nce": 0.25290611386299133, "loss_mse": 0.002458664821460843, "lr": 0.00020980216590785273, "grad_norm": 0.13428625464439392, "wall_ms": 1169677}
{"step": 84400, "loss": 0.2341419756412506, "loss_nce": 0.23389551043510437, "loss_mse": 0.0024646823294460773, "lr": 0.00020929456251696295, "grad_norm": 0.1320737600326538, "wall_ms": 1170967}
{"step": 84500, "loss": 0.25519242882728577, "loss_nce": 0.25494152307510376, "loss_mse": 0.0025089550763368607, "lr": 0.0002087871623126051, "grad_norm": 0.12658676505088806, "wall_ms": 1172255}
{"step": 84600, "loss": 0.22326228022575378, "loss_nce": 0.2230120450258255, "loss_mse": 0.0025022930931299925, "lr": 0.00020827996755046042, "grad_norm": 0.12344833463430405, "wall_ms": 1173554}
{"step": 84700, "loss": 0.23619000613689423, "loss_nce": 0.23593956232070923, "loss_mse": 0.0025044563226401806, "lr": 0.00020777298048529653, "grad_norm": 0.13359791040420532, "wall_ms": 1174844}
{"step": 84800, "loss": 0.22911441326141357, "loss_nce": 0.22886252403259277, "loss_mse": 0.0025188422296196222, "lr": 0.0002072662033709578, "grad_norm": 0.12253966182470322, "wall_ms": 1176153}
{"step": 84900, "loss": 0.24711351096630096, "loss_nce": 0.24686484038829803, "loss_mse": 0.0024867746978998184, "lr": 0.0002067596384603553, "grad_norm": 0.12901175022125244, "wall_ms": 1177454}
{"step": 85000, "loss": 0.2323715090751648, "loss_nce": 0.23212581872940063, "loss_mse": 0.002456917194649577, "lr": 0.00020625328800545663, "grad_norm": 0.11384231597185135, "wall_ms": 1178756}
{"step": 85100, "loss": 0.24401797354221344, "loss_nce": 0.24376697838306427, "loss_mse": 0.0025099588092416525, "lr": 0.00020574715425727614, "grad_norm": 0.1326419860124588, "wall_ms": 1180056}
{"step": 85200, "loss": 0.24947801232337952, "loss_nce": 0.24923139810562134, "loss_mse": 0.0024661801289767027, "lr": 0.0002052412394658647, "grad_norm": 0.12868192791938782, "wall_ms": 1181367}
{"step": 85300, "loss": 0.26806876063346863, "loss_nce": 0.2678179144859314, "loss_mse": 0.0025083175860345364, "lr": 0.00020473554588029992, "grad_norm": 0.14016762375831604, "wall_ms": 1182680}
{"step": 85400, "loss": 0.24987399578094482, "loss_nce": 0.24962304532527924, "loss_mse": 0.002509576501324773, "lr": 0.00020423007574867577, "grad_norm": 0.12394144386053085, "wall_ms": 1183991}
{"step": 85500, "loss": 0.2557125389575958, "loss_nce": 0.2554613947868347, "loss_mse": 0.002511579543352127, "lr": 0.00020372483131809318, "grad_norm": 0.13421444594860077, "wall_ms": 1185285}
{"step": 85600, "loss": 0.2397826611995697, "loss_nce": 0.2395303100347519, "loss_mse": 0.0025235202629119158, "lr": 0.0002032198148346495, "grad_norm": 0.12643396854400635, "wall_ms": 1186591}
{"step": 85700, "loss": 0.2446412593126297, "loss_nce": 0.24439644813537598, "loss_mse": 0.002448105486109853, "lr": 0.00020271502854342867, "grad_norm": 0.13165292143821716, "wall_ms": 1187902}
{"step": 85800, "loss": 0.24142393469810486, "loss_nce": 0.24117228388786316, "loss_mse": 0.0025165248662233353, "lr": 0.00020221047468849148, "grad_norm": 0.11403150856494904, "wall_ms": 1189208}
{"step": 85900, "loss": 0.22960162162780762, "loss_nce": 0.22935537993907928, "loss_mse": 0.002462481614202261, "lr": 0.0002017061555128653, "grad_norm": 0.1265111118555069, "wall_ms": 1190498}
{"step": 86000, "loss": 0.24976782500743866, "loss_nce": 0.24951814115047455, "loss_mse": 0.0024968311190605164, "lr": 0.00020120207325853413, "grad_norm": 0.13225626945495605, "wall_ms": 1191771}
{"step": 86100, "loss": 0.20812441408634186, "loss_nce": 0.20787443220615387, "loss_mse": 0.0024997671134769917, "lr": 0.0002006982301664289, "grad_norm": 0.1207258552312851, "wall_ms": 1193039}
{"step": 86200, "loss": 0.2652721107006073, "loss_nce": 0.2650177478790283, "loss_mse": 0.0025437644217163324, "lr": 0.0002001946284764173, "grad_norm": 0.1365315318107605, "wall_ms": 1194305}
{"step": 86300, "loss": 0.25410738587379456, "loss_nce": 0.2538568377494812, "loss_mse": 0.0025055930018424988, "lr": 0.00019969127042729378, "grad_norm": 0.12790372967720032, "wall_ms": 1195561}
{"step": 86400, "loss": 0.2352416217327118, "loss_nce": 0.2349979728460312, "loss_mse": 0.002436553593724966, "lr": 0.00019918815825676969, "grad_norm": 0.12288076430559158, "wall_ms": 1196817}
{"step": 86500, "loss": 0.22645916044712067, "loss_nce": 0.22621110081672668, "loss_mse": 0.002480625407770276, "lr": 0.00019868529420146336, "grad_norm": 0.12082066386938095, "wall_ms": 1198090}
{"step": 86600, "loss": 0.26360762119293213, "loss_nce": 0.2633529305458069, "loss_mse": 0.002546832663938403, "lr": 0.00019818268049688994, "grad_norm": 0.1280178427696228, "wall_ms": 1199364}
{"step": 86700, "loss": 0.21389812231063843, "loss_nce": 0.21365554630756378, "loss_mse": 0.002425787504762411, "lr": 0.0001976803193774519, "grad_norm": 0.11937915533781052, "wall_ms": 1200636}
{"step": 86800, "loss": 0.24110771715641022, "loss_nce": 0.24085699021816254, "loss_mse": 0.0025073166470974684, "lr": 0.00019717821307642855, "grad_norm": 0.13221712410449982, "wall_ms": 1201907}
{"step": 86900, "loss": 0.2587890028953552, "loss_nce": 0.2585384249687195, "loss_mse": 0.0025057208258658648, "lr": 0.00019667636382596652, "grad_norm": 0.14501434564590454, "wall_ms": 1203178}
{"step": 87000, "loss": 0.2629021108150482, "loss_nce": 0.26264941692352295, "loss_mse": 0.002526917029172182, "lr": 0.00019617477385706976, "grad_norm": 0.12470678985118866, "wall_ms": 1204449}
{"step": 87100, "loss": 0.2492952048778534, "loss_nce": 0.24904221296310425, "loss_mse": 0.0025299012195318937, "lr": 0.00019567344539958944, "grad_norm": 0.13544483482837677, "wall_ms": 1205725}
{"step": 87200, "loss": 0.23649661242961884, "loss_nce": 0.2362423837184906, "loss_mse": 0.00254224706441164, "lr": 0.00019517238068221437, "grad_norm": 0.12334533035755157, "wall_ms": 1206993}
{"step": 87300, "loss": 0.24799787998199463, "loss_nce": 0.2477450668811798, "loss_mse": 0.0025281147100031376, "lr": 0.00019467158193246065, "grad_norm": 0.13827131688594818, "wall_ms": 1208265}
{"step": 87400, "loss": 0.26395249366760254, "loss_nce": 0.2637055814266205, "loss_mse": 0.0024690008722245693, "lr": 0.00019417105137666213, "grad_norm": 0.14403869211673737, "wall_ms": 1209527}
{"step": 87500, "loss": 0.31985458731651306, "loss_nce": 0.31959885358810425, "loss_mse": 0.002557257190346718, "lr": 0.00019367079123996033, "grad_norm": 0.154152512550354, "wall_ms": 1210791}
{"step": 87600, "loss": 0.25239047408103943, "loss_nce": 0.25214487314224243, "loss_mse": 0.0024559658486396074, "lr": 0.0001931708037462946, "grad_norm": 0.12872591614723206, "wall_ms": 1212040}
{"step": 87700, "loss": 0.2567395269870758, "loss_nce": 0.25648483633995056, "loss_mse": 0.0025470193941146135, "lr": 0.0001926710911183923, "grad_norm": 0.1473730504512787, "wall_ms": 1213309}
{"step": 87800, "loss": 0.24651211500167847, "loss_nce": 0.2462623566389084, "loss_mse": 0.0024975240230560303, "lr": 0.00019217165557775881, "grad_norm": 0.12774640321731567, "wall_ms": 1214582}
{"step": 87900, "loss": 0.27333587408065796, "loss_nce": 0.2730846405029297, "loss_mse": 0.0025123723316937685, "lr": 0.0001916724993446678, "grad_norm": 0.1264362931251526, "wall_ms": 1215854}
{"step": 88000, "loss": 0.2569163143634796, "loss_nce": 0.25666430592536926, "loss_mse": 0.0025201302487403154, "lr": 0.00019117362463815093, "grad_norm": 0.11998406797647476, "wall_ms": 1217099}
{"step": 88100, "loss": 0.2584187984466553, "loss_nce": 0.25816577672958374, "loss_mse": 0.002530090743675828, "lr": 0.00019067503367598888, "grad_norm": 0.13325203955173492, "wall_ms": 1218342}
{"step": 88200, "loss": 0.2625066041946411, "loss_nce": 0.26225876808166504, "loss_mse": 0.002478315494954586, "lr": 0.00019017672867470046, "grad_norm": 0.12065303325653076, "wall_ms": 1219581}
{"step": 88300, "loss": 0.2715751826763153, "loss_nce": 0.2713276445865631, "loss_mse": 0.0024752323515713215, "lr": 0.00018967871184953335, "grad_norm": 0.13687893748283386, "wall_ms": 1220823}
{"step": 88400, "loss": 0.2570055425167084, "loss_nce": 0.2567521929740906, "loss_mse": 0.002533626975491643, "lr": 0.00018918098541445417, "grad_norm": 0.13315877318382263, "wall_ms": 1222066}
{"step": 88500, "loss": 0.23693589866161346, "loss_nce": 0.23669138550758362, "loss_mse": 0.002445068908855319, "lr": 0.00018868355158213848, "grad_norm": 0.12456870079040527, "wall_ms": 1223306}
{"step": 88600, "loss": 0.2874966263771057, "loss_nce": 0.2872379422187805, "loss_mse": 0.0025867840740829706, "lr": 0.0001881864125639612, "grad_norm": 0.13515543937683105, "wall_ms": 1224554}
{"step": 88700, "loss": 0.2229393571615219, "loss_nce": 0.2226886749267578, "loss_mse": 0.0025068791583180428, "lr": 0.00018768957056998668, "grad_norm": 0.11473929136991501, "wall_ms": 1225798}
{"step": 88800, "loss": 0.24142080545425415, "loss_nce": 0.2411716729402542, "loss_mse": 0.002491252962499857, "lr": 0.00018719302780895855, "grad_norm": 0.12224312126636505, "wall_ms": 1227055}
{"step": 88900, "loss": 0.24763984978199005, "loss_nce": 0.247391015291214, "loss_mse": 0.0024882820434868336, "lr": 0.00018669678648829032, "grad_norm": 0.12234140187501907, "wall_ms": 1228302}
{"step": 89000, "loss": 0.22979548573493958, "loss_nce": 0.22954963147640228, "loss_mse": 0.0024585057981312275, "lr": 0.0001862008488140557, "grad_norm": 0.12425526976585388, "wall_ms": 1229559}
{"step": 89100, "loss": 0.23136717081069946, "loss_nce": 0.23112061619758606, "loss_mse": 0.0024655635934323072, "lr": 0.0001857052169909781, "grad_norm": 0.1258072853088379, "wall_ms": 1230813}
{"step": 89200, "loss": 0.21540959179401398, "loss_nce": 0.21516013145446777, "loss_mse": 0.0024946313351392746, "lr": 0.0001852098932224215, "grad_norm": 0.10562931001186371, "wall_ms": 1232075}
{"step": 89300, "loss": 0.2379121631383896, "loss_nce": 0.2376616895198822, "loss_mse": 0.002504788339138031, "lr": 0.00018471487971038025, "grad_norm": 0.11606722325086594, "wall_ms": 1233318}
{"step": 89400, "loss": 0.2626013159751892, "loss_nce": 0.26235419511795044, "loss_mse": 0.0024712218437343836, "lr": 0.00018422017865546962, "grad_norm": 0.1285991221666336, "wall_ms": 1234573}
{"step": 89500, "loss": 0.23344838619232178, "loss_nce": 0.23319414258003235, "loss_mse": 0.002542362315580249, "lr": 0.00018372579225691554, "grad_norm": 0.1220928505063057, "wall_ms": 1235812}
{"step": 89600, "loss": 0.26173290610313416, "loss_nce": 0.26147907972335815, "loss_mse": 0.002538120374083519, "lr": 0.00018323172271254555, "grad_norm": 0.1299985647201538, "wall_ms": 1237052}
{"step": 89700, "loss": 0.24141374230384827, "loss_nce": 0.2411702573299408, "loss_mse": 0.0024349158629775047, "lr": 0.00018273797221877818, "grad_norm": 0.12896883487701416, "wall_ms": 1238292}
{"step": 89800, "loss": 0.23889531195163727, "loss_nce": 0.23864245414733887, "loss_mse": 0.0025285969022661448, "lr": 0.00018224454297061367, "grad_norm": 0.1351984441280365, "wall_ms": 1239533}
{"step": 89900, "loss": 0.25995683670043945, "loss_nce": 0.259707510471344, "loss_mse": 0.0024931500665843487, "lr": 0.00018175143716162457, "grad_norm": 0.12944556772708893, "wall_ms": 1240784}
{"step": 90000, "loss": 0.25084343552589417, "loss_nce": 0.2505882680416107, "loss_mse": 0.0025517847388982773, "lr": 0.00018125865698394495, "grad_norm": 0.12869036197662354, "wall_ms": 1242050}
{"step": 90100, "loss": 0.2667419910430908, "loss_nce": 0.2664896249771118, "loss_mse": 0.0025236420333385468, "lr": 0.00018076620462826164, "grad_norm": 0.14706836640834808, "wall_ms": 1256143}
{"step": 90200, "loss": 0.2752123773097992, "loss_nce": 0.2749616503715515, "loss_mse": 0.0025073394645005465, "lr": 0.00018027408228380395, "grad_norm": 0.13747172057628632, "wall_ms": 1257418}
{"step": 90300, "loss": 0.2659275233745575, "loss_nce": 0.2656799256801605, "loss_mse": 0.0024758721701800823, "lr": 0.0001797822921383342, "grad_norm": 0.13011285662651062, "wall_ms": 1258666}
{"step": 90400, "loss": 0.2424124926328659, "loss_nce": 0.24216113984584808, "loss_mse": 0.0025135523173958063, "lr": 0.00017929083637813792, "grad_norm": 0.1298898607492447, "wall_ms": 1259909}
{"step": 90500, "loss": 0.24584713578224182, "loss_nce": 0.24560151994228363, "loss_mse": 0.002456185407936573, "lr": 0.000178799717188014, "grad_norm": 0.12822066247463226, "wall_ms": 1261152}
{"step": 90600, "loss": 0.27588126063346863, "loss_nce": 0.27563023567199707, "loss_mse": 0.0025102016516029835, "lr": 0.00017830893675126517, "grad_norm": 0.1464400440454483, "wall_ms": 1262393}
{"step": 90700, "loss": 0.22270789742469788, "loss_nce": 0.2224641889333725, "loss_mse": 0.002437050687149167, "lr": 0.000177818497249688, "grad_norm": 0.1298769861459732, "wall_ms": 1263632}
{"step": 90800, "loss": 0.2466227412223816, "loss_nce": 0.24637429416179657, "loss_mse": 0.002484411234036088, "lr": 0.0001773284008635639, "grad_norm": 0.12846824526786804, "wall_ms": 1264902}
{"step": 90900, "loss": 0.2294379025697708, "loss_nce": 0.22919347882270813, "loss_mse": 0.0024442861322313547, "lr": 0.00017683864977164845, "grad_norm": 0.11463682353496552, "wall_ms": 1266200}
{"step": 91000, "loss": 0.20997169613838196, "loss_nce": 0.20972149074077606, "loss_mse": 0.0025020374450832605, "lr": 0.00017634924615116243, "grad_norm": 0.11768694967031479, "wall_ms": 1267499}
{"step": 91100, "loss": 0.2519955039024353, "loss_nce": 0.2517491281032562, "loss_mse": 0.0024639000184834003, "lr": 0.0001758601921777818, "grad_norm": 0.13508161902427673, "wall_ms": 1268798}
{"step": 91200, "loss": 0.1991480439901352, "loss_nce": 0.19889962673187256, "loss_mse": 0.0024841465055942535, "lr": 0.00017537149002562827, "grad_norm": 0.10717687755823135, "wall_ms": 1270080}
{"step": 91300, "loss": 0.25423702597618103, "loss_nce": 0.2539841830730438, "loss_mse": 0.00252849911339581, "lr": 0.0001748831418672594, "grad_norm": 0.13077037036418915, "wall_ms": 1271354}
{"step": 91400, "loss": 0.23141761124134064, "loss_nce": 0.23116812109947205, "loss_mse": 0.0024949475191533566, "lr": 0.00017439514987365912, "grad_norm": 0.11551559716463089, "wall_ms": 1272624}
{"step": 91500, "loss": 0.2594107389450073, "loss_nce": 0.25916486978530884, "loss_mse": 0.002458564704284072, "lr": 0.00017390751621422794, "grad_norm": 0.12611036002635956, "wall_ms": 1273892}
{"step": 91600, "loss": 0.25947850942611694, "loss_nce": 0.2592279314994812, "loss_mse": 0.002505912445485592, "lr": 0.00017342024305677333, "grad_norm": 0.1338888555765152, "wall_ms": 1275157}
{"step": 91700, "loss": 0.2748717963695526, "loss_nce": 0.274620920419693, "loss_mse": 0.0025086619425565004, "lr": 0.00017293333256750035, "grad_norm": 0.14130795001983643, "wall_ms": 1276430}
{"step": 91800, "loss": 0.2839985489845276, "loss_nce": 0.28374752402305603, "loss_mse": 0.002510265912860632, "lr": 0.00017244678691100157, "grad_norm": 0.15010273456573486, "wall_ms": 1277700}
{"step": 91900, "loss": 0.2281424105167389, "loss_nce": 0.22789427638053894, "loss_mse": 0.0024813334457576275, "lr": 0.00017196060825024778, "grad_norm": 0.12613874673843384, "wall_ms": 1278971}
{"step": 92000, "loss": 0.2706833481788635, "loss_nce": 0.27042704820632935, "loss_mse": 0.002562878653407097, "lr": 0.0001714747987465782, "grad_norm": 0.14028215408325195, "wall_ms": 1280236}
{"step": 92100, "loss": 0.2342773675918579, "loss_nce": 0.234025776386261, "loss_mse": 0.002515841741114855, "lr": 0.00017098936055969088, "grad_norm": 0.12644881010055542, "wall_ms": 1281490}
{"step": 92200, "loss": 0.27216142416000366, "loss_nce": 0.2719089388847351, "loss_mse": 0.0025248059537261724, "lr": 0.0001705042958476334, "grad_norm": 0.1423255205154419, "wall_ms": 1282732}
{"step": 92300, "loss": 0.24437256157398224, "loss_nce": 0.24412085115909576, "loss_mse": 0.002517038257792592, "lr": 0.00017001960676679288, "grad_norm": 0.13330303132534027, "wall_ms": 1283975}
{"step": 92400, "loss": 0.25371211767196655, "loss_nce": 0.2534641921520233, "loss_mse": 0.0024791588075459003, "lr": 0.0001695352954718864, "grad_norm": 0.13594985008239746, "wall_ms": 1285217}
{"step": 92500, "loss": 0.254379540681839, "loss_nce": 0.2541285753250122, "loss_mse": 0.00250973179936409, "lr": 0.00016905136411595189, "grad_norm": 0.13497617840766907, "wall_ms": 1286465}
{"step": 92600, "loss": 0.2395474761724472, "loss_nce": 0.23930054903030396, "loss_mse": 0.00246926280669868, "lr": 0.00016856781485033779, "grad_norm": 0.12907227873802185, "wall_ms": 1287730}
{"step": 92700, "loss": 0.23686930537223816, "loss_nce": 0.23662154376506805, "loss_mse": 0.0024776754435151815, "lr": 0.00016808464982469445, "grad_norm": 0.12818709015846252, "wall_ms": 1288988}
{"step": 92800, "loss": 0.29981184005737305, "loss_nce": 0.2995589077472687, "loss_mse": 0.002529369667172432, "lr": 0.00016760187118696382, "grad_norm": 0.14262735843658447, "wall_ms": 1290252}
{"step": 92900, "loss": 0.21556493639945984, "loss_nce": 0.2153153121471405, "loss_mse": 0.0024963023606687784, "lr": 0.00016711948108336994, "grad_norm": 0.1303691565990448, "wall_ms": 1291500}
{"step": 93000, "loss": 0.2497091144323349, "loss_nce": 0.24946238100528717, "loss_mse": 0.002467336365953088, "lr": 0.0001666374816584099, "grad_norm": 0.13735799491405487, "wall_ms": 1292755}
{"step": 93100, "loss": 0.2506151497364044, "loss_nce": 0.25036054849624634, "loss_mse": 0.002546130446717143, "lr": 0.00016615587505484396, "grad_norm": 0.12401197850704193, "wall_ms": 1294023}
{"step": 93200, "loss": 0.2774741053581238, "loss_nce": 0.27722668647766113, "loss_mse": 0.0024742467794567347, "lr": 0.00016567466341368582, "grad_norm": 0.14183348417282104, "wall_ms": 1295274}
{"step": 93300, "loss": 0.23292449116706848, "loss_nce": 0.23267832398414612, "loss_mse": 0.0024616308510303497, "lr": 0.00016519384887419362, "grad_norm": 0.1198989748954773, "wall_ms": 1296513}
{"step": 93400, "loss": 0.22136414051055908, "loss_nce": 0.22111472487449646, "loss_mse": 0.002494203392416239, "lr": 0.00016471343357386004, "grad_norm": 0.12135247141122818, "wall_ms": 1297751}
{"step": 93500, "loss": 0.24335643649101257, "loss_nce": 0.24310921132564545, "loss_mse": 0.0024722707457840443, "lr": 0.00016423341964840282, "grad_norm": 0.12960907816886902, "wall_ms": 1299000}
{"step": 93600, "loss": 0.27698206901550293, "loss_nce": 0.2767321467399597, "loss_mse": 0.002499079331755638, "lr": 0.00016375380923175568, "grad_norm": 0.13659018278121948, "wall_ms": 1300245}
{"step": 93700, "loss": 0.3012251853942871, "loss_nce": 0.3009718060493469, "loss_mse": 0.0025336977560073137, "lr": 0.0001632746044560582, "grad_norm": 0.1422685831785202, "wall_ms": 1301490}
{"step": 93800, "loss": 0.24762101471424103, "loss_nce": 0.24737338721752167, "loss_mse": 0.0024762419052422047, "lr": 0.00016279580745164675, "grad_norm": 0.12316868454217911, "wall_ms": 1303087}
{"step": 93900, "loss": 0.22386324405670166, "loss_nce": 0.22361552715301514, "loss_mse": 0.0024771145544946194, "lr": 0.00016231742034704502, "grad_norm": 0.11062169820070267, "wall_ms": 1304329}
{"step": 94000, "loss": 0.21507006883621216, "loss_nce": 0.21482475101947784, "loss_mse": 0.0024531157687306404, "lr": 0.00016183944526895427, "grad_norm": 0.11382230371236801, "wall_ms": 1305574}
{"step": 94100, "loss": 0.2162904441356659, "loss_nce": 0.21603934466838837, "loss_mse": 0.002510944614186883, "lr": 0.00016136188434224432, "grad_norm": 0.12186649441719055, "wall_ms": 1306842}
{"step": 94200, "loss": 0.26101750135421753, "loss_nce": 0.2607659101486206, "loss_mse": 0.0025160457007586956, "lr": 0.0001608847396899436, "grad_norm": 0.13923558592796326, "wall_ms": 1308099}
{"step": 94300, "loss": 0.21959881484508514, "loss_nce": 0.21935710310935974, "loss_mse": 0.0024171106051653624, "lr": 0.00016040801343323015, "grad_norm": 0.11885412037372589, "wall_ms": 1309348}
{"step": 94400, "loss": 0.24378077685832977, "loss_nce": 0.2435295283794403, "loss_mse": 0.0025124438107013702, "lr": 0.00015993170769142186, "grad_norm": 0.1262073963880539, "wall_ms": 1310590}
{"step": 94500, "loss": 0.23910486698150635, "loss_nce": 0.23885230720043182, "loss_mse": 0.002525586634874344, "lr": 0.00015945582458196737, "grad_norm": 0.1147700697183609, "wall_ms": 1311828}
{"step": 94600, "loss": 0.26641109585762024, "loss_nce": 0.26616111397743225, "loss_mse": 0.0024999079760164022, "lr": 0.00015898036622043633, "grad_norm": 0.1344679594039917, "wall_ms": 1313071}
{"step": 94700, "loss": 0.2061833292245865, "loss_nce": 0.20593959093093872, "loss_mse": 0.0024374418426305056, "lr": 0.00015850533472051018, "grad_norm": 0.11646930128335953, "wall_ms": 1314312}
{"step": 94800, "loss": 0.22273385524749756, "loss_nce": 0.22248013317584991, "loss_mse": 0.0025372232776135206, "lr": 0.00015803073219397277, "grad_norm": 0.12728933990001678, "wall_ms": 1315553}
{"step": 94900, "loss": 0.21981410682201385, "loss_nce": 0.2195667326450348, "loss_mse": 0.002473801840096712, "lr": 0.0001575565607507009, "grad_norm": 0.1259678602218628, "wall_ms": 1316797}
{"step": 95000, "loss": 0.22089383006095886, "loss_nce": 0.22064605355262756, "loss_mse": 0.0024777213111519814, "lr": 0.0001570828224986549, "grad_norm": 0.11670995503664017, "wall_ms": 1318043}
{"step": 95100, "loss": 0.2394278347492218, "loss_nce": 0.2391815036535263, "loss_mse": 0.002463252516463399, "lr": 0.00015660951954386937, "grad_norm": 0.12374774366617203, "wall_ms": 1319285}
{"step": 95200, "loss": 0.26411521434783936, "loss_nce": 0.263863205909729, "loss_mse": 0.0025200212839990854, "lr": 0.00015613665399044377, "grad_norm": 0.1294999122619629, "wall_ms": 1320528}
{"step": 95300, "loss": 0.22822540998458862, "loss_nce": 0.22797511518001556, "loss_mse": 0.0025029326789081097, "lr": 0.00015566422794053298, "grad_norm": 0.11983049660921097, "wall_ms": 1321777}
{"step": 95400, "loss": 0.20459069311618805, "loss_nce": 0.20434552431106567, "loss_mse": 0.0024516498669981956, "lr": 0.00015519224349433834, "grad_norm": 0.10861554741859436, "wall_ms": 1323019}
{"step": 95500, "loss": 0.22274596989154816, "loss_nce": 0.2224949151277542, "loss_mse": 0.002510593505576253, "lr": 0.0001547207027500975, "grad_norm": 0.11990932375192642, "wall_ms": 1324266}
{"step": 95600, "loss": 0.25046131014823914, "loss_nce": 0.25020915269851685, "loss_mse": 0.002521507441997528, "lr": 0.0001542496078040761, "grad_norm": 0.13000071048736572, "wall_ms": 1325512}
{"step": 95700, "loss": 0.21723681688308716, "loss_nce": 0.21699321269989014, "loss_mse": 0.0024360509123653173, "lr": 0.00015377896075055773, "grad_norm": 0.11402026563882828, "wall_ms": 1326763}
{"step": 95800, "loss": 0.21771126985549927, "loss_nce": 0.2174569070339203, "loss_mse": 0.002543566981330514, "lr": 0.00015330876368183473, "grad_norm": 0.12172259390354156, "wall_ms": 1328013}
{"step": 95900, "loss": 0.2423863559961319, "loss_nce": 0.24213483929634094, "loss_mse": 0.002515220083296299, "lr": 0.00015283901868819928, "grad_norm": 0.1308065801858902, "wall_ms": 1329275}
{"step": 96000, "loss": 0.25146669149398804, "loss_nce": 0.2512176036834717, "loss_mse": 0.002490753773599863, "lr": 0.00015236972785793356, "grad_norm": 0.1241852343082428, "wall_ms": 1330580}
{"step": 96100, "loss": 0.22205986082553864, "loss_nce": 0.22181154787540436, "loss_mse": 0.002483127173036337, "lr": 0.00015190089327730088, "grad_norm": 0.12777644395828247, "wall_ms": 1331877}
{"step": 96200, "loss": 0.20819823443889618, "loss_nce": 0.2079511284828186, "loss_mse": 0.0024710611905902624, "lr": 0.0001514325170305362, "grad_norm": 0.1023794561624527, "wall_ms": 1333186}
{"step": 96300, "loss": 0.23647354543209076, "loss_nce": 0.23622728884220123, "loss_mse": 0.0024625773075968027, "lr": 0.00015096460119983713, "grad_norm": 0.13396048545837402, "wall_ms": 1334467}
{"step": 96400, "loss": 0.23367032408714294, "loss_nce": 0.2334214746952057, "loss_mse": 0.002488480182364583, "lr": 0.00015049714786535422, "grad_norm": 0.12178581953048706, "wall_ms": 1335745}
{"step": 96500, "loss": 0.22901050746440887, "loss_nce": 0.22875842452049255, "loss_mse": 0.0025207679718732834, "lr": 0.00015003015910518202, "grad_norm": 0.11291544139385223, "wall_ms": 1337029}
{"step": 96600, "loss": 0.2164226621389389, "loss_nce": 0.21617186069488525, "loss_mse": 0.0025079594925045967, "lr": 0.00014956363699534985, "grad_norm": 0.11271078884601593, "wall_ms": 1338308}
{"step": 96700, "loss": 0.2453351616859436, "loss_nce": 0.24508807063102722, "loss_mse": 0.0024708411656320095, "lr": 0.00014909758360981236, "grad_norm": 0.12292271852493286, "wall_ms": 1339608}
{"step": 96800, "loss": 0.2623409926891327, "loss_nce": 0.2620977461338043, "loss_mse": 0.0024323666002601385, "lr": 0.00014863200102044078, "grad_norm": 0.1451224386692047, "wall_ms": 1340904}
{"step": 96900, "loss": 0.23920202255249023, "loss_nce": 0.23895183205604553, "loss_mse": 0.0025019594468176365, "lr": 0.00014816689129701303, "grad_norm": 0.133546382188797, "wall_ms": 1342177}
{"step": 97000, "loss": 0.2541520297527313, "loss_nce": 0.2539029121398926, "loss_mse": 0.0024912217631936073, "lr": 0.00014770225650720503, "grad_norm": 0.1456584483385086, "wall_ms": 1343472}
{"step": 97100, "loss": 0.23003928363323212, "loss_nce": 0.2297964245080948, "loss_mse": 0.0024285935796797276, "lr": 0.00014723809871658135, "grad_norm": 0.12051232159137726, "wall_ms": 1344766}
{"step": 97200, "loss": 0.23056356608867645, "loss_nce": 0.23031361401081085, "loss_mse": 0.002499462803825736, "lr": 0.0001467744199885859, "grad_norm": 0.1270681917667389, "wall_ms": 1346028}
{"step": 97300, "loss": 0.25735676288604736, "loss_nce": 0.25710707902908325, "loss_mse": 0.002496789675205946, "lr": 0.0001463112223845332, "grad_norm": 0.13186079263687134, "wall_ms": 1347277}
{"step": 97400, "loss": 0.23111814260482788, "loss_nce": 0.2308712899684906, "loss_mse": 0.0024685594253242016, "lr": 0.0001458485079635986, "grad_norm": 0.12396325916051865, "wall_ms": 1348525}
{"step": 97500, "loss": 0.21951250731945038, "loss_nce": 0.21927011013031006, "loss_mse": 0.0024240100756287575, "lr": 0.0001453862787828096, "grad_norm": 0.11710774153470993, "wall_ms": 1349768}
{"step": 97600, "loss": 0.23889920115470886, "loss_nce": 0.23864753544330597, "loss_mse": 0.0025167029816657305, "lr": 0.00014492453689703627, "grad_norm": 0.12085913121700287, "wall_ms": 1351012}
{"step": 97700, "loss": 0.24388553202152252, "loss_nce": 0.24363544583320618, "loss_mse": 0.0025008651427924633, "lr": 0.00014446328435898276, "grad_norm": 0.12318313121795654, "wall_ms": 1352255}
{"step": 97800, "loss": 0.2235371470451355, "loss_nce": 0.22328981757164001, "loss_mse": 0.002473368775099516, "lr": 0.00014400252321917757, "grad_norm": 0.11925846338272095, "wall_ms": 1353495}
{"step": 97900, "loss": 0.22808851301670074, "loss_nce": 0.22784096002578735, "loss_mse": 0.00247556297108531, "lr": 0.0001435422555259646, "grad_norm": 0.13126875460147858, "wall_ms": 1354738}
{"step": 98000, "loss": 0.26616546511650085, "loss_nce": 0.2659141719341278, "loss_mse": 0.0025129602290689945, "lr": 0.00014308248332549423, "grad_norm": 0.13036036491394043, "wall_ms": 1355984}
{"step": 98100, "loss": 0.22974735498428345, "loss_nce": 0.2294989824295044, "loss_mse": 0.002483687363564968, "lr": 0.000142623208661714, "grad_norm": 0.11536819487810135, "wall_ms": 1357261}
{"step": 98200, "loss": 0.24312841892242432, "loss_nce": 0.24288122355937958, "loss_mse": 0.0024719235952943563, "lr": 0.0001421644335763598, "grad_norm": 0.12566782534122467, "wall_ms": 1358558}
{"step": 98300, "loss": 0.21503488719463348, "loss_nce": 0.21478238701820374, "loss_mse": 0.002524979179725051, "lr": 0.00014170616010894643, "grad_norm": 0.1158171147108078, "wall_ms": 1359854}
{"step": 98400, "loss": 0.23787271976470947, "loss_nce": 0.23762060701847076, "loss_mse": 0.0025211519096046686, "lr": 0.00014124839029675868, "grad_norm": 0.12594608962535858, "wall_ms": 1361153}
{"step": 98500, "loss": 0.2381526529788971, "loss_nce": 0.23790061473846436, "loss_mse": 0.002520415000617504, "lr": 0.00014079112617484252, "grad_norm": 0.13518980145454407, "wall_ms": 1362455}
{"step": 98600, "loss": 0.21745990216732025, "loss_nce": 0.2172141671180725, "loss_mse": 0.0024573032278567553, "lr": 0.0001403343697759957, "grad_norm": 0.11471447348594666, "wall_ms": 1363697}
{"step": 98700, "loss": 0.23306335508823395, "loss_nce": 0.23281913995742798, "loss_mse": 0.0024421117268502712, "lr": 0.00013987812313075884, "grad_norm": 0.1334286332130432, "wall_ms": 1364936}
{"step": 98800, "loss": 0.2435445338487625, "loss_nce": 0.24329625070095062, "loss_mse": 0.0024828899186104536, "lr": 0.00013942238826740645, "grad_norm": 0.13260836899280548, "wall_ms": 1366185}
{"step": 98900, "loss": 0.23435820639133453, "loss_nce": 0.23411288857460022, "loss_mse": 0.0024531832896173, "lr": 0.0001389671672119378, "grad_norm": 0.11776809394359589, "wall_ms": 1367433}
{"step": 99000, "loss": 0.21077829599380493, "loss_nce": 0.21052920818328857, "loss_mse": 0.002490810351446271, "lr": 0.0001385124619880681, "grad_norm": 0.11741834878921509, "wall_ms": 1368685}
{"step": 99100, "loss": 0.24830935895442963, "loss_nce": 0.2480609118938446, "loss_mse": 0.0024844694416970015, "lr": 0.0001380582746172195, "grad_norm": 0.1315642148256302, "wall_ms": 1369951}
{"step": 99200, "loss": 0.21354800462722778, "loss_nce": 0.21330194175243378, "loss_mse": 0.0024606310762465, "lr": 0.00013760460711851173, "grad_norm": 0.1158556342124939, "wall_ms": 1371211}
{"step": 99300, "loss": 0.21530279517173767, "loss_nce": 0.21505610644817352, "loss_mse": 0.0024669235572218895, "lr": 0.00013715146150875354, "grad_norm": 0.1207362562417984, "wall_ms": 1372454}
{"step": 99400, "loss": 0.24121637642383575, "loss_nce": 0.24096736311912537, "loss_mse": 0.0024901132564991713, "lr": 0.00013669883980243363, "grad_norm": 0.12792077660560608, "wall_ms": 1373702}
{"step": 99500, "loss": 0.22114521265029907, "loss_nce": 0.22089259326457977, "loss_mse": 0.0025261854752898216, "lr": 0.00013624674401171156, "grad_norm": 0.1161666288971901, "wall_ms": 1374953}
{"step": 99600, "loss": 0.2095032036304474, "loss_nce": 0.20925700664520264, "loss_mse": 0.002462042961269617, "lr": 0.00013579517614640898, "grad_norm": 0.12324820458889008, "wall_ms": 1376192}
{"step": 99700, "loss": 0.24088580906391144, "loss_nce": 0.24063006043434143, "loss_mse": 0.0025575130712240934, "lr": 0.00013534413821400052, "grad_norm": 0.1308315098285675, "wall_ms": 1377443}
{"step": 99800, "loss": 0.2642159163951874, "loss_nce": 0.2639612555503845, "loss_mse": 0.002546678762882948, "lr": 0.000134893632219605, "grad_norm": 0.13572323322296143, "wall_ms": 1378692}
{"step": 99900, "loss": 0.2132987678050995, "loss_nce": 0.21305084228515625, "loss_mse": 0.002479203511029482, "lr": 0.00013444366016597648, "grad_norm": 0.1166524738073349, "wall_ms": 1379938}
{"step": 100000, "loss": 0.25559329986572266, "loss_nce": 0.25534653663635254, "loss_mse": 0.002467550802975893, "lr": 0.00013399422405349545, "grad_norm": 0.1326277256011963, "wall_ms": 1381204}
{"step": 100100, "loss": 0.21651239693164825, "loss_nce": 0.21626359224319458, "loss_mse": 0.0024880580604076385, "lr": 0.00013354532588015969, "grad_norm": 0.11886835098266602, "wall_ms": 1394283}
{"step": 100200, "loss": 0.2406487613916397, "loss_nce": 0.24039888381958008, "loss_mse": 0.002498755231499672, "lr": 0.0001330969676415755, "grad_norm": 0.14682906866073608, "wall_ms": 1395537}
{"step": 100300, "loss": 0.2077072560787201, "loss_nce": 0.2074613720178604, "loss_mse": 0.0024588743690401316, "lr": 0.00013264915133094917, "grad_norm": 0.1106819435954094, "wall_ms": 1396796}
{"step": 100400, "loss": 0.226068913936615, "loss_nce": 0.2258167415857315, "loss_mse": 0.002521742135286331, "lr": 0.0001322018789390774, "grad_norm": 0.11160331219434738, "wall_ms": 1398054}
{"step": 100500, "loss": 0.2345929890871048, "loss_nce": 0.23434628546237946, "loss_mse": 0.0024670418351888657, "lr": 0.00013175515245433914, "grad_norm": 0.1197398379445076, "wall_ms": 1399318}
{"step": 100600, "loss": 0.25761500000953674, "loss_nce": 0.25736004114151, "loss_mse": 0.0025496515445411205, "lr": 0.00013130897386268639, "grad_norm": 0.1346634328365326, "wall_ms": 1400592}
{"step": 100700, "loss": 0.24736589193344116, "loss_nce": 0.24711477756500244, "loss_mse": 0.002511119470000267, "lr": 0.00013086334514763546, "grad_norm": 0.12838877737522125, "wall_ms": 1401832}
{"step": 100800, "loss": 0.23622046411037445, "loss_nce": 0.23597463965415955, "loss_mse": 0.002458233153447509, "lr": 0.00013041826829025805, "grad_norm": 0.12970568239688873, "wall_ms": 1403069}
{"step": 100900, "loss": 0.20490169525146484, "loss_nce": 0.20465663075447083, "loss_mse": 0.0024505809415131807, "lr": 0.00012997374526917282, "grad_norm": 0.11265675723552704, "wall_ms": 1404305}
{"step": 101000, "loss": 0.24249839782714844, "loss_nce": 0.2422456592321396, "loss_mse": 0.0025273843202739954, "lr": 0.00012952977806053603, "grad_norm": 0.1255521923303604, "wall_ms": 1405545}
{"step": 101100, "loss": 0.2595990300178528, "loss_nce": 0.2593478858470917, "loss_mse": 0.0025113685987889767, "lr": 0.00012908636863803318, "grad_norm": 0.1262778639793396, "wall_ms": 1406784}
{"step": 101200, "loss": 0.2359476238489151, "loss_nce": 0.23569750785827637, "loss_mse": 0.0025011999532580376, "lr": 0.00012864351897287, "grad_norm": 0.1289750635623932, "wall_ms": 1408028}
{"step": 101300, "loss": 0.21955426037311554, "loss_nce": 0.21930629014968872, "loss_mse": 0.002479695715010166, "lr": 0.00012820123103376386, "grad_norm": 0.11645591259002686, "wall_ms": 1409279}
{"step": 101400, "loss": 0.2158840149641037, "loss_nce": 0.21563567221164703, "loss_mse": 0.0024833581410348415, "lr": 0.00012775950678693492, "grad_norm": 0.12179689109325409, "wall_ms": 1410551}
{"step": 101500, "loss": 0.19803693890571594, "loss_nce": 0.19779080152511597, "loss_mse": 0.002461364259943366, "lr": 0.00012731834819609743, "grad_norm": 0.11260953545570374, "wall_ms": 1411821}
{"step": 101600, "loss": 0.22116518020629883, "loss_nce": 0.22091540694236755, "loss_mse": 0.0024977598804980516, "lr": 0.00012687775722245096, "grad_norm": 0.11995737999677658, "wall_ms": 1413089}
{"step": 101700, "loss": 0.25161516666412354, "loss_nce": 0.2513658106327057, "loss_mse": 0.002493476727977395, "lr": 0.0001264377358246716, "grad_norm": 0.13147054612636566, "wall_ms": 1414359}
{"step": 101800, "loss": 0.22477604448795319, "loss_nce": 0.2245267778635025, "loss_mse": 0.002492703963071108, "lr": 0.00012599828595890355, "grad_norm": 0.126186341047287, "wall_ms": 1415630}
{"step": 101900, "loss": 0.23999640345573425, "loss_nce": 0.23974621295928955, "loss_mse": 0.0025018942542374134, "lr": 0.00012555940957875003, "grad_norm": 0.12940502166748047, "wall_ms": 1416900}
{"step": 102000, "loss": 0.22698235511779785, "loss_nce": 0.22673778235912323, "loss_mse": 0.0024457776453346014, "lr": 0.00012512110863526507, "grad_norm": 0.11696331948041916, "wall_ms": 1418172}
{"step": 102100, "loss": 0.2404869645833969, "loss_nce": 0.24023321270942688, "loss_mse": 0.0025374540127813816, "lr": 0.00012468338507694423, "grad_norm": 0.12292957305908203, "wall_ms": 1419449}
{"step": 102200, "loss": 0.26119688153266907, "loss_nce": 0.2609463930130005, "loss_mse": 0.002504924079403281, "lr": 0.00012424624084971648, "grad_norm": 0.1321808248758316, "wall_ms": 1420709}
{"step": 102300, "loss": 0.21958588063716888, "loss_nce": 0.21933738887310028, "loss_mse": 0.002484953496605158, "lr": 0.0001238096778969352, "grad_norm": 0.13023363053798676, "wall_ms": 1421957}
{"step": 102400, "loss": 0.2169957309961319, "loss_nce": 0.21675041317939758, "loss_mse": 0.0024531397502869368, "lr": 0.00012337369815936983, "grad_norm": 0.11905986815690994, "wall_ms": 1423227}
{"step": 102500, "loss": 0.2219053953886032, "loss_nce": 0.22165794670581818, "loss_mse": 0.0024744404945522547, "lr": 0.00012293830357519694, "grad_norm": 0.12253691256046295, "wall_ms": 1424499}
{"step": 102600, "loss": 0.2542188763618469, "loss_nce": 0.2539643347263336, "loss_mse": 0.002545522293075919, "lr": 0.00012250349607999183, "grad_norm": 0.1286219209432602, "wall_ms": 1425768}
{"step": 102700, "loss": 0.23536518216133118, "loss_nce": 0.23511525988578796, "loss_mse": 0.0024992269463837147, "lr": 0.00012206927760671978, "grad_norm": 0.12862293422222137, "wall_ms": 1427037}
{"step": 102800, "loss": 0.2450464367866516, "loss_nce": 0.24479249119758606, "loss_mse": 0.002539494540542364, "lr": 0.00012163565008572777, "grad_norm": 0.1319330483675003, "wall_ms": 1428311}
{"step": 102900, "loss": 0.2590380907058716, "loss_nce": 0.2587857246398926, "loss_mse": 0.0025237894151359797, "lr": 0.00012120261544473541, "grad_norm": 0.1367601454257965, "wall_ms": 1429582}
{"step": 103000, "loss": 0.21448393166065216, "loss_nce": 0.21423259377479553, "loss_mse": 0.0025134372990578413, "lr": 0.00012077017560882673, "grad_norm": 0.11883274465799332, "wall_ms": 1430839}
{"step": 103100, "loss": 0.29404717683792114, "loss_nce": 0.2937975525856018, "loss_mse": 0.002496185479685664, "lr": 0.0001203383325004415, "grad_norm": 0.13959148526191711, "wall_ms": 1432103}
{"step": 103200, "loss": 0.2652566134929657, "loss_nce": 0.2650047838687897, "loss_mse": 0.0025183078832924366, "lr": 0.00011990708803936667, "grad_norm": 0.13120433688163757, "wall_ms": 1433364}
{"step": 103300, "loss": 0.21833536028862, "loss_nce": 0.2180849313735962, "loss_mse": 0.002504270989447832, "lr": 0.00011947644414272789, "grad_norm": 0.11283805966377258, "wall_ms": 1434623}
{"step": 103400, "loss": 0.2196773886680603, "loss_nce": 0.21942409873008728, "loss_mse": 0.002532838610932231, "lr": 0.0001190464027249809, "grad_norm": 0.11673209071159363, "wall_ms": 1435888}
{"step": 103500, "loss": 0.22199617326259613, "loss_nce": 0.22175189852714539, "loss_mse": 0.002442793920636177, "lr": 0.00011861696569790325, "grad_norm": 0.11983811110258102, "wall_ms": 1437146}
{"step": 103600, "loss": 0.21038983762264252, "loss_nce": 0.21014225482940674, "loss_mse": 0.002475823275744915, "lr": 0.00011818813497058553, "grad_norm": 0.11506108194589615, "wall_ms": 1438404}
{"step": 103700, "loss": 0.23344558477401733, "loss_nce": 0.2331966757774353, "loss_mse": 0.0024891409557312727, "lr": 0.00011775991244942284, "grad_norm": 0.12015807628631592, "wall_ms": 1439664}
{"step": 103800, "loss": 0.20118707418441772, "loss_nce": 0.20094379782676697, "loss_mse": 0.0024327021092176437, "lr": 0.00011733230003810678, "grad_norm": 0.1187390387058258, "wall_ms": 1440920}
{"step": 103900, "loss": 0.273278146982193, "loss_nce": 0.27302658557891846, "loss_mse": 0.0025157337076961994, "lr": 0.00011690529963761637, "grad_norm": 0.1393316388130188, "wall_ms": 1442172}
{"step": 104000, "loss": 0.2606537938117981, "loss_nce": 0.26040130853652954, "loss_mse": 0.0025249796453863382, "lr": 0.00011647891314621007, "grad_norm": 0.131487637758255, "wall_ms": 1443412}
{"step": 104100, "loss": 0.24291276931762695, "loss_nce": 0.242661252617836, "loss_mse": 0.002515100408345461, "lr": 0.00011605314245941702, "grad_norm": 0.12983807921409607, "wall_ms": 1444652}
{"step": 104200, "loss": 0.2639644742012024, "loss_nce": 0.26371580362319946, "loss_mse": 0.0024867570027709007, "lr": 0.00011562798947002892, "grad_norm": 0.13182547688484192, "wall_ms": 1445892}
{"step": 104300, "loss": 0.20943443477153778, "loss_nce": 0.20918531715869904, "loss_mse": 0.0024912061635404825, "lr": 0.00011520345606809133, "grad_norm": 0.11463864147663116, "wall_ms": 1447136}
{"step": 104400, "loss": 0.24258874356746674, "loss_nce": 0.24233821034431458, "loss_mse": 0.002505285432562232, "lr": 0.00011477954414089549, "grad_norm": 0.12249559909105301, "wall_ms": 1448379}
{"step": 104500, "loss": 0.2401612251996994, "loss_nce": 0.23991097509860992, "loss_mse": 0.002502442803233862, "lr": 0.00011435625557296966, "grad_norm": 0.12259282171726227, "wall_ms": 1449641}
{"step": 104600, "loss": 0.222985178232193, "loss_nce": 0.22273220121860504, "loss_mse": 0.0025296960957348347, "lr": 0.00011393359224607123, "grad_norm": 0.11309056729078293, "wall_ms": 1450908}
{"step": 104700, "loss": 0.22654080390930176, "loss_nce": 0.22629430890083313, "loss_mse": 0.002464916091412306, "lr": 0.00011351155603917779, "grad_norm": 0.11681569367647171, "wall_ms": 1452153}
{"step": 104800, "loss": 0.23074288666248322, "loss_nce": 0.23049461841583252, "loss_mse": 0.002482687821611762, "lr": 0.00011309014882847904, "grad_norm": 0.12473347038030624, "wall_ms": 1453393}
{"step": 104900, "loss": 0.22085945308208466, "loss_nce": 0.2206152379512787, "loss_mse": 0.002442215569317341, "lr": 0.0001126693724873685, "grad_norm": 0.11341565102338791, "wall_ms": 1454631}
{"step": 105000, "loss": 0.20222491025924683, "loss_nce": 0.2019783854484558, "loss_mse": 0.002465283963829279, "lr": 0.00011224922888643516, "grad_norm": 0.10971153527498245, "wall_ms": 1455872}
{"step": 105100, "loss": 0.22608591616153717, "loss_nce": 0.22583648562431335, "loss_mse": 0.002494356594979763, "lr": 0.00011182971989345491, "grad_norm": 0.12496718764305115, "wall_ms": 1457112}
{"step": 105200, "loss": 0.22146932780742645, "loss_nce": 0.2212224155664444, "loss_mse": 0.002469050930812955, "lr": 0.0001114108473733828, "grad_norm": 0.11929679661989212, "wall_ms": 1458351}
{"step": 105300, "loss": 0.23320280015468597, "loss_nce": 0.23295697569847107, "loss_mse": 0.0024582117330282927, "lr": 0.0001109926131883441, "grad_norm": 0.12278161942958832, "wall_ms": 1459590}
{"step": 105400, "loss": 0.2673824727535248, "loss_nce": 0.26713472604751587, "loss_mse": 0.0024774260818958282, "lr": 0.00011057501919762635, "grad_norm": 0.1332719624042511, "wall_ms": 1460830}
{"step": 105500, "loss": 0.24659490585327148, "loss_nce": 0.24634355306625366, "loss_mse": 0.002513549290597439, "lr": 0.0001101580672576713, "grad_norm": 0.12730403244495392, "wall_ms": 1462075}
{"step": 105600, "loss": 0.2341727912425995, "loss_nce": 0.23392081260681152, "loss_mse": 0.0025197872892022133, "lr": 0.00010974175922206622, "grad_norm": 0.14154909551143646, "wall_ms": 1463316}
{"step": 105700, "loss": 0.2464757114648819, "loss_nce": 0.24622592329978943, "loss_mse": 0.0024978986475616693, "lr": 0.00010932609694153578, "grad_norm": 0.12334682047367096, "wall_ms": 1464555}
{"step": 105800, "loss": 0.2572616934776306, "loss_nce": 0.2570097744464874, "loss_mse": 0.0025193155743181705, "lr": 0.00010891108226393414, "grad_norm": 0.1319233924150467, "wall_ms": 1465796}
{"step": 105900, "loss": 0.23791667819023132, "loss_nce": 0.23766165971755981, "loss_mse": 0.0025502026546746492, "lr": 0.00010849671703423632, "grad_norm": 0.1264808028936386, "wall_ms": 1467037}
{"step": 106000, "loss": 0.23615331947803497, "loss_nce": 0.2359025776386261, "loss_mse": 0.002507440047338605, "lr": 0.00010808300309453024, "grad_norm": 0.12485652416944504, "wall_ms": 1468276}
{"step": 106100, "loss": 0.23061098158359528, "loss_nce": 0.2303619384765625, "loss_mse": 0.002490455750375986, "lr": 0.00010766994228400843, "grad_norm": 0.11509805917739868, "wall_ms": 1469515}
{"step": 106200, "loss": 0.2452501505613327, "loss_nce": 0.24499914050102234, "loss_mse": 0.002510032383725047, "lr": 0.00010725753643896002, "grad_norm": 0.12964914739131927, "wall_ms": 1470771}
{"step": 106300, "loss": 0.21734392642974854, "loss_nce": 0.21709729731082916, "loss_mse": 0.0024663261137902737, "lr": 0.00010684578739276225, "grad_norm": 0.12403932213783264, "wall_ms": 1472034}
{"step": 106400, "loss": 0.27294570207595825, "loss_nce": 0.27269622683525085, "loss_mse": 0.002494809217751026, "lr": 0.00010643469697587256, "grad_norm": 0.14518706500530243, "wall_ms": 1473283}
{"step": 106500, "loss": 0.23452410101890564, "loss_nce": 0.2342737317085266, "loss_mse": 0.0025036674924194813, "lr": 0.00010602426701582068, "grad_norm": 0.12115678936243057, "wall_ms": 1474527}
{"step": 106600, "loss": 0.2275490015745163, "loss_nce": 0.22730784118175507, "loss_mse": 0.0024116672575473785, "lr": 0.00010561449933719992, "grad_norm": 0.11255809664726257, "wall_ms": 1475769}
{"step": 106700, "loss": 0.2265806794166565, "loss_nce": 0.22632832825183868, "loss_mse": 0.0025234695058315992, "lr": 0.00010520539576165947, "grad_norm": 0.12004205584526062, "wall_ms": 1477007}
{"step": 106800, "loss": 0.2739086151123047, "loss_nce": 0.27365630865097046, "loss_mse": 0.0025230960454791784, "lr": 0.0001047969581078962, "grad_norm": 0.1368747055530548, "wall_ms": 1478247}
{"step": 106900, "loss": 0.24186912178993225, "loss_nce": 0.2416185885667801, "loss_mse": 0.002505400450900197, "lr": 0.00010438918819164653, "grad_norm": 0.12192046642303467, "wall_ms": 1479486}
{"step": 107000, "loss": 0.2410837560892105, "loss_nce": 0.2408347725868225, "loss_mse": 0.002489803358912468, "lr": 0.00010398208782567862, "grad_norm": 0.131719172000885, "wall_ms": 1480726}
{"step": 107100, "loss": 0.2333446890115738, "loss_nce": 0.23309603333473206, "loss_mse": 0.002486563054844737, "lr": 0.0001035756588197839, "grad_norm": 0.12406033277511597, "wall_ms": 1481964}
{"step": 107200, "loss": 0.25582003593444824, "loss_nce": 0.25557008385658264, "loss_mse": 0.0024994672276079655, "lr": 0.0001031699029807693, "grad_norm": 0.13576732575893402, "wall_ms": 1483212}
{"step": 107300, "loss": 0.2215675711631775, "loss_nce": 0.22131691873073578, "loss_mse": 0.002506591845303774, "lr": 0.00010276482211244898, "grad_norm": 0.12107666581869125, "wall_ms": 1484468}
{"step": 107400, "loss": 0.25080162286758423, "loss_nce": 0.25054624676704407, "loss_mse": 0.0025537500623613596, "lr": 0.00010236041801563681, "grad_norm": 0.14207114279270172, "wall_ms": 1485730}
{"step": 107500, "loss": 0.2551945745944977, "loss_nce": 0.25494498014450073, "loss_mse": 0.002496027387678623, "lr": 0.00010195669248813775, "grad_norm": 0.13352777063846588, "wall_ms": 1486982}
{"step": 107600, "loss": 0.24908366799354553, "loss_nce": 0.2488381564617157, "loss_mse": 0.002455053385347128, "lr": 0.00010155364732474023, "grad_norm": 0.13203811645507812, "wall_ms": 1488222}
{"step": 107700, "loss": 0.23715533316135406, "loss_nce": 0.23690247535705566, "loss_mse": 0.0025285568553954363, "lr": 0.00010115128431720805, "grad_norm": 0.12369126081466675, "wall_ms": 1489461}
{"step": 107800, "loss": 0.25204363465309143, "loss_nce": 0.2517896294593811, "loss_mse": 0.0025399618316441774, "lr": 0.00010074960525427245, "grad_norm": 0.14147788286209106, "wall_ms": 1490701}
{"step": 107900, "loss": 0.25148236751556396, "loss_nce": 0.2512318789958954, "loss_mse": 0.002504941774532199, "lr": 0.00010034861192162417, "grad_norm": 0.13977168500423431, "wall_ms": 1491949}
{"step": 108000, "loss": 0.23106245696544647, "loss_nce": 0.23081842064857483, "loss_mse": 0.0024404150899499655, "lr": 9.994830610190543e-05, "grad_norm": 0.11149867624044418, "wall_ms": 1493188}
{"step": 108100, "loss": 0.25318530201911926, "loss_nce": 0.2529377341270447, "loss_mse": 0.0024755860213190317, "lr": 9.95486895747022e-05, "grad_norm": 0.14548929035663605, "wall_ms": 1494440}
{"step": 108200, "loss": 0.23500527441501617, "loss_nce": 0.2347596287727356, "loss_mse": 0.002456502290442586, "lr": 9.914976411653591e-05, "grad_norm": 0.12083642929792404, "wall_ms": 1495689}
{"step": 108300, "loss": 0.21804256737232208, "loss_nce": 0.21779637038707733, "loss_mse": 0.002461900468915701, "lr": 9.875153150085613e-05, "grad_norm": 0.1253689080476761, "wall_ms": 1496944}
{"step": 108400, "loss": 0.20922112464904785, "loss_nce": 0.2089744508266449, "loss_mse": 0.0024667074903845787, "lr": 9.835399349803215e-05, "grad_norm": 0.11886762827634811, "wall_ms": 1498193}
{"step": 108500, "loss": 0.2417028844356537, "loss_nce": 0.24144795536994934, "loss_mse": 0.0025492385029792786, "lr": 9.795715187534526e-05, "grad_norm": 0.12617209553718567, "wall_ms": 1499447}
{"step": 108600, "loss": 0.25251418352127075, "loss_nce": 0.2522619366645813, "loss_mse": 0.00252250162884593, "lr": 9.756100839698098e-05, "grad_norm": 0.13464166224002838, "wall_ms": 1500687}
{"step": 108700, "loss": 0.2249554991722107, "loss_nce": 0.2247048020362854, "loss_mse": 0.002506902674213052, "lr": 9.716556482402138e-05, "grad_norm": 0.13114379346370697, "wall_ms": 1501929}
{"step": 108800, "loss": 0.22904954850673676, "loss_nce": 0.22880110144615173, "loss_mse": 0.002484474331140518, "lr": 9.67708229144369e-05, "grad_norm": 0.1181061714887619, "wall_ms": 1503172}
{"step": 108900, "loss": 0.24325542151927948, "loss_nce": 0.2430049479007721, "loss_mse": 0.002504755510017276, "lr": 9.637678442307856e-05, "grad_norm": 0.11632013320922852, "wall_ms": 1504423}
{"step": 109000, "loss": 0.2179616242647171, "loss_nce": 0.21771875023841858, "loss_mse": 0.0024287633132189512, "lr": 9.598345110167054e-05, "grad_norm": 0.11331735551357269, "wall_ms": 1505679}
{"step": 109100, "loss": 0.24486415088176727, "loss_nce": 0.24461495876312256, "loss_mse": 0.0024919575080275536, "lr": 9.559082469880185e-05, "grad_norm": 0.13084018230438232, "wall_ms": 1506922}
{"step": 109200, "loss": 0.2441418617963791, "loss_nce": 0.24389082193374634, "loss_mse": 0.0025104728993028402, "lr": 9.519890695991924e-05, "grad_norm": 0.12139135599136353, "wall_ms": 1508162}
{"step": 109300, "loss": 0.2344423532485962, "loss_nce": 0.23419037461280823, "loss_mse": 0.002519772155210376, "lr": 9.480769962731873e-05, "grad_norm": 0.13230861723423004, "wall_ms": 1509401}
{"step": 109400, "loss": 0.2709510624408722, "loss_nce": 0.2707008719444275, "loss_mse": 0.002501758513972163, "lr": 9.441720444013828e-05, "grad_norm": 0.13957498967647552, "wall_ms": 1510639}
{"step": 109500, "loss": 0.22230547666549683, "loss_nce": 0.22205829620361328, "loss_mse": 0.002471877494826913, "lr": 9.402742313434995e-05, "grad_norm": 0.12291543930768967, "wall_ms": 1511879}
{"step": 109600, "loss": 0.28279349207878113, "loss_nce": 0.282537579536438, "loss_mse": 0.0025591240264475346, "lr": 9.36383574427522e-05, "grad_norm": 0.1571941077709198, "wall_ms": 1513126}
{"step": 109700, "loss": 0.25866568088531494, "loss_nce": 0.2584158480167389, "loss_mse": 0.0024982502218335867, "lr": 9.325000909496216e-05, "grad_norm": 0.13547055423259735, "wall_ms": 1514374}
{"step": 109800, "loss": 0.2318403720855713, "loss_nce": 0.2315889596939087, "loss_mse": 0.0025140743236988783, "lr": 9.286237981740803e-05, "grad_norm": 0.12298484891653061, "wall_ms": 1515614}
{"step": 109900, "loss": 0.22915391623973846, "loss_nce": 0.22889935970306396, "loss_mse": 0.00254549877718091, "lr": 9.247547133332129e-05, "grad_norm": 0.13517187535762787, "wall_ms": 1516881}
{"step": 110000, "loss": 0.241079181432724, "loss_nce": 0.2408301830291748, "loss_mse": 0.0024900492280721664, "lr": 9.208928536272896e-05, "grad_norm": 0.1410403698682785, "wall_ms": 1518146}
{"step": 110100, "loss": 0.2276158630847931, "loss_nce": 0.2273663878440857, "loss_mse": 0.0024947819765657187, "lr": 9.17038236224464e-05, "grad_norm": 0.12090368568897247, "wall_ms": 1532677}
{"step": 110200, "loss": 0.21345195174217224, "loss_nce": 0.21320590376853943, "loss_mse": 0.002460522809997201, "lr": 9.131908782606909e-05, "grad_norm": 0.11753109097480774, "wall_ms": 1533947}
{"step": 110300, "loss": 0.23115003108978271, "loss_nce": 0.23090064525604248, "loss_mse": 0.0024938250426203012, "lr": 9.093507968396518e-05, "grad_norm": 0.12620782852172852, "wall_ms": 1535220}
{"step": 110400, "loss": 0.23049767315387726, "loss_nce": 0.23025007545948029, "loss_mse": 0.0024759983643889427, "lr": 9.055180090326841e-05, "grad_norm": 0.1218491718173027, "wall_ms": 1536482}
{"step": 110500, "loss": 0.27720049023628235, "loss_nce": 0.27694693207740784, "loss_mse": 0.002535648411139846, "lr": 9.016925318786974e-05, "grad_norm": 0.142528235912323, "wall_ms": 1537742}
{"step": 110600, "loss": 0.2911316156387329, "loss_nce": 0.290882408618927, "loss_mse": 0.002491988008841872, "lr": 8.97874382384102e-05, "grad_norm": 0.13989630341529846, "wall_ms": 1539011}
{"step": 110700, "loss": 0.22197292745113373, "loss_nce": 0.22172510623931885, "loss_mse": 0.0024782277178019285, "lr": 8.940635775227333e-05, "grad_norm": 0.11147478222846985, "wall_ms": 1540257}
{"step": 110800, "loss": 0.22475770115852356, "loss_nce": 0.22450542449951172, "loss_mse": 0.002522772178053856, "lr": 8.902601342357746e-05, "grad_norm": 0.1174997091293335, "wall_ms": 1541515}
{"step": 110900, "loss": 0.24009516835212708, "loss_nce": 0.23984631896018982, "loss_mse": 0.002488454570993781, "lr": 8.864640694316842e-05, "grad_norm": 0.1372726559638977, "wall_ms": 1542780}
{"step": 111000, "loss": 0.23393848538398743, "loss_nce": 0.23368562757968903, "loss_mse": 0.00252862018533051, "lr": 8.826753999861169e-05, "grad_norm": 0.11782417446374893, "wall_ms": 1544052}
{"step": 111100, "loss": 0.2225712537765503, "loss_nce": 0.22232340276241302, "loss_mse": 0.002478470094501972, "lr": 8.788941427418539e-05, "grad_norm": 0.12200915813446045, "wall_ms": 1545329}
{"step": 111200, "loss": 0.21398727595806122, "loss_nce": 0.21373838186264038, "loss_mse": 0.00248894770629704, "lr": 8.751203145087226e-05, "grad_norm": 0.11916957050561905, "wall_ms": 1546589}
{"step": 111300, "loss": 0.24202795326709747, "loss_nce": 0.24177251756191254, "loss_mse": 0.002554401522502303, "lr": 8.71353932063525e-05, "grad_norm": 0.1244397982954979, "wall_ms": 1547857}
{"step": 111400, "loss": 0.24799269437789917, "loss_nce": 0.2477419376373291, "loss_mse": 0.002507612342014909, "lr": 8.675950121499621e-05, "grad_norm": 0.130677729845047, "wall_ms": 1549113}
{"step": 111500, "loss": 0.23666055500507355, "loss_nce": 0.23640868067741394, "loss_mse": 0.002518690889701247, "lr": 8.638435714785599e-05, "grad_norm": 0.12874162197113037, "wall_ms": 1550367}
{"step": 111600, "loss": 0.26208919286727905, "loss_nce": 0.2618345618247986, "loss_mse": 0.002546404954046011, "lr": 8.60099626726595e-05, "grad_norm": 0.1261587142944336, "wall_ms": 1551627}
{"step": 111700, "loss": 0.24296878278255463, "loss_nce": 0.24272018671035767, "loss_mse": 0.002485984703525901, "lr": 8.563631945380203e-05, "grad_norm": 0.14163635671138763, "wall_ms": 1552894}
{"step": 111800, "loss": 0.2062508761882782, "loss_nce": 0.20600290596485138, "loss_mse": 0.0024797464720904827, "lr": 8.526342915233903e-05, "grad_norm": 0.10804910212755203, "wall_ms": 1554150}
{"step": 111900, "loss": 0.20149949193000793, "loss_nce": 0.2012508064508438, "loss_mse": 0.0024869272019714117, "lr": 8.489129342597907e-05, "grad_norm": 0.11264150589704514, "wall_ms": 1555408}
{"step": 112000, "loss": 0.25786587595939636, "loss_nce": 0.25761812925338745, "loss_mse": 0.0024774260818958282, "lr": 8.451991392907593e-05, "grad_norm": 0.12361988425254822, "wall_ms": 1556670}
{"step": 112100, "loss": 0.23046594858169556, "loss_nce": 0.23021841049194336, "loss_mse": 0.0024753427132964134, "lr": 8.414929231262155e-05, "grad_norm": 0.1260027438402176, "wall_ms": 1557930}
{"step": 112200, "loss": 0.23471499979496002, "loss_nce": 0.23446989059448242, "loss_mse": 0.0024511581286787987, "lr": 8.377943022423886e-05, "grad_norm": 0.11764904111623764, "wall_ms": 1559176}
{"step": 112300, "loss": 0.23257571458816528, "loss_nce": 0.23232993483543396, "loss_mse": 0.0024578366428613663, "lr": 8.341032930817408e-05, "grad_norm": 0.12590469419956207, "wall_ms": 1560424}
{"step": 112400, "loss": 0.24910223484039307, "loss_nce": 0.24885082244873047, "loss_mse": 0.0025141399819403887, "lr": 8.304199120528965e-05, "grad_norm": 0.132279634475708, "wall_ms": 1561680}
{"step": 112500, "loss": 0.24988748133182526, "loss_nce": 0.24964101612567902, "loss_mse": 0.0024646047968417406, "lr": 8.267441755305682e-05, "grad_norm": 0.11924656480550766, "wall_ms": 1562931}
{"step": 112600, "loss": 0.26913484930992126, "loss_nce": 0.26888442039489746, "loss_mse": 0.0025041711051017046, "lr": 8.230760998554849e-05, "grad_norm": 0.1385858654975891, "wall_ms": 1564175}
{"step": 112700, "loss": 0.2505287230014801, "loss_nce": 0.2502749264240265, "loss_mse": 0.002537916414439678, "lr": 8.194157013343187e-05, "grad_norm": 0.13150599598884583, "wall_ms": 1565425}
{"step": 112800, "loss": 0.25360381603240967, "loss_nce": 0.2533530592918396, "loss_mse": 0.002507524099200964, "lr": 8.15762996239611e-05, "grad_norm": 0.14199231564998627, "wall_ms": 1566685}
{"step": 112900, "loss": 0.2448788285255432, "loss_nce": 0.24462811648845673, "loss_mse": 0.0025071739219129086, "lr": 8.121180008097052e-05, "grad_norm": 0.12795741856098175, "wall_ms": 1567959}
{"step": 113000, "loss": 0.24936923384666443, "loss_nce": 0.24912038445472717, "loss_mse": 0.002488476922735572, "lr": 8.084807312486669e-05, "grad_norm": 0.13001751899719238, "wall_ms": 1569208}
{"step": 113100, "loss": 0.2449132353067398, "loss_nce": 0.24466145038604736, "loss_mse": 0.002517839428037405, "lr": 8.04851203726218e-05, "grad_norm": 0.13958381116390228, "wall_ms": 1570448}
{"step": 113200, "loss": 0.2687680423259735, "loss_nce": 0.2685169577598572, "loss_mse": 0.0025108291301876307, "lr": 8.012294343776616e-05, "grad_norm": 0.13693425059318542, "wall_ms": 1571686}
{"step": 113300, "loss": 0.25227266550064087, "loss_nce": 0.25202295184135437, "loss_mse": 0.0024970814120024443, "lr": 7.976154393038126e-05, "grad_norm": 0.14612522721290588, "wall_ms": 1572924}
{"step": 113400, "loss": 0.20928561687469482, "loss_nce": 0.20904070138931274, "loss_mse": 0.002449086168780923, "lr": 7.940092345709232e-05, "grad_norm": 0.1094638928771019, "wall_ms": 1574161}
{"step": 113500, "loss": 0.25820600986480713, "loss_nce": 0.2579512298107147, "loss_mse": 0.0025477767921984196, "lr": 7.904108362106139e-05, "grad_norm": 0.139370858669281, "wall_ms": 1575399}
{"step": 113600, "loss": 0.2345312237739563, "loss_nce": 0.23428119719028473, "loss_mse": 0.0025002416223287582, "lr": 7.868202602198028e-05, "grad_norm": 0.12249815464019775, "wall_ms": 1576637}
{"step": 113700, "loss": 0.2458421289920807, "loss_nce": 0.24559171497821808, "loss_mse": 0.00250415806658566, "lr": 7.832375225606312e-05, "grad_norm": 0.13105079531669617, "wall_ms": 1577875}
{"step": 113800, "loss": 0.23719850182533264, "loss_nce": 0.2369479537010193, "loss_mse": 0.0025055529549717903, "lr": 7.796626391603943e-05, "grad_norm": 0.11850573867559433, "wall_ms": 1579115}
{"step": 113900, "loss": 0.24179445207118988, "loss_nce": 0.24154329299926758, "loss_mse": 0.002511647529900074, "lr": 7.760956259114737e-05, "grad_norm": 0.1314617395401001, "wall_ms": 1580374}
{"step": 114000, "loss": 0.24614277482032776, "loss_nce": 0.24589528143405914, "loss_mse": 0.0024749133735895157, "lr": 7.725364986712608e-05, "grad_norm": 0.13782520592212677, "wall_ms": 1581646}
{"step": 114100, "loss": 0.2432290017604828, "loss_nce": 0.24297931790351868, "loss_mse": 0.002496876288205385, "lr": 7.689852732620897e-05, "grad_norm": 0.12755189836025238, "wall_ms": 1582920}
{"step": 114200, "loss": 0.22309988737106323, "loss_nce": 0.22285348176956177, "loss_mse": 0.0024641233030706644, "lr": 7.654419654711672e-05, "grad_norm": 0.12703941762447357, "wall_ms": 1584193}
{"step": 114300, "loss": 0.21489866077899933, "loss_nce": 0.21464911103248596, "loss_mse": 0.0024955293629318476, "lr": 7.619065910505013e-05, "grad_norm": 0.11537569761276245, "wall_ms": 1585467}
{"step": 114400, "loss": 0.23755890130996704, "loss_nce": 0.23730745911598206, "loss_mse": 0.00251442170701921, "lr": 7.583791657168316e-05, "grad_norm": 0.12491784244775772, "wall_ms": 1586737}
{"step": 114500, "loss": 0.23148322105407715, "loss_nce": 0.2312328815460205, "loss_mse": 0.002503429539501667, "lr": 7.548597051515602e-05, "grad_norm": 0.13197776675224304, "wall_ms": 1587981}
{"step": 114600, "loss": 0.2265281081199646, "loss_nce": 0.22627711296081543, "loss_mse": 0.0025099758058786392, "lr": 7.513482250006795e-05, "grad_norm": 0.12868942320346832, "wall_ms": 1589225}
{"step": 114700, "loss": 0.24144724011421204, "loss_nce": 0.2411966174840927, "loss_mse": 0.0025062330532819033, "lr": 7.478447408747076e-05, "grad_norm": 0.13168415427207947, "wall_ms": 1590468}
{"step": 114800, "loss": 0.23235198855400085, "loss_nce": 0.23210695385932922, "loss_mse": 0.0024502843152731657, "lr": 7.443492683486127e-05, "grad_norm": 0.11974857747554779, "wall_ms": 1591710}
{"step": 114900, "loss": 0.24504446983337402, "loss_nce": 0.24479658901691437, "loss_mse": 0.002478796523064375, "lr": 7.408618229617488e-05, "grad_norm": 0.13620145618915558, "wall_ms": 1592949}
{"step": 115000, "loss": 0.24477949738502502, "loss_nce": 0.24452762305736542, "loss_mse": 0.002518812194466591, "lr": 7.373824202177833e-05, "grad_norm": 0.13482767343521118, "wall_ms": 1594188}
{"step": 115100, "loss": 0.21025711297988892, "loss_nce": 0.21000726521015167, "loss_mse": 0.002498515648767352, "lr": 7.339110755846307e-05, "grad_norm": 0.12854698300361633, "wall_ms": 1595428}
{"step": 115200, "loss": 0.24469296634197235, "loss_nce": 0.2444480061531067, "loss_mse": 0.0024495800025761127, "lr": 7.304478044943823e-05, "grad_norm": 0.13128817081451416, "wall_ms": 1596670}
{"step": 115300, "loss": 0.26531606912612915, "loss_nce": 0.26506873965263367, "loss_mse": 0.0024733440950512886, "lr": 7.269926223432363e-05, "grad_norm": 0.129322350025177, "wall_ms": 1597908}
{"step": 115400, "loss": 0.2082817256450653, "loss_nce": 0.20803704857826233, "loss_mse": 0.0024467166513204575, "lr": 7.235455444914348e-05, "grad_norm": 0.1170288473367691, "wall_ms": 1599147}
{"step": 115500, "loss": 0.22817598283290863, "loss_nce": 0.22793006896972656, "loss_mse": 0.002459161216393113, "lr": 7.20106586263188e-05, "grad_norm": 0.11667069792747498, "wall_ms": 1600386}
{"step": 115600, "loss": 0.26360705494880676, "loss_nce": 0.263360857963562, "loss_mse": 0.0024620736949145794, "lr": 7.166757629466107e-05, "grad_norm": 0.13785283267498016, "wall_ms": 1601628}
{"step": 115700, "loss": 0.2459590882062912, "loss_nce": 0.2457072138786316, "loss_mse": 0.002518790541216731, "lr": 7.132530897936554e-05, "grad_norm": 0.1277628093957901, "wall_ms": 1602872}
{"step": 115800, "loss": 0.22136695683002472, "loss_nce": 0.2211190164089203, "loss_mse": 0.002479457063600421, "lr": 7.0983858202004e-05, "grad_norm": 0.12455305457115173, "wall_ms": 1604154}
{"step": 115900, "loss": 0.22899788618087769, "loss_nce": 0.22874441742897034, "loss_mse": 0.0025347110349684954, "lr": 7.064322548051839e-05, "grad_norm": 0.12831686437129974, "wall_ms": 1605423}
{"step": 116000, "loss": 0.24338586628437042, "loss_nce": 0.24313832819461823, "loss_mse": 0.002475434448570013, "lr": 7.03034123292139e-05, "grad_norm": 0.13388517498970032, "wall_ms": 1606692}
{"step": 116100, "loss": 0.22410057485103607, "loss_nce": 0.2238486111164093, "loss_mse": 0.0025196343194693327, "lr": 6.996442025875229e-05, "grad_norm": 0.13123087584972382, "wall_ms": 1607958}
{"step": 116200, "loss": 0.24787141382694244, "loss_nce": 0.247624009847641, "loss_mse": 0.0024741042871028185, "lr": 6.962625077614509e-05, "grad_norm": 0.12528382241725922, "wall_ms": 1609225}
{"step": 116300, "loss": 0.23962870240211487, "loss_nce": 0.23938274383544922, "loss_mse": 0.00245960196480155, "lr": 6.928890538474702e-05, "grad_norm": 0.1358492523431778, "wall_ms": 1610501}
{"step": 116400, "loss": 0.24477124214172363, "loss_nce": 0.24452540278434753, "loss_mse": 0.002458354225382209, "lr": 6.895238558424924e-05, "grad_norm": 0.12990516424179077, "wall_ms": 1611763}
{"step": 116500, "loss": 0.23905691504478455, "loss_nce": 0.23881202936172485, "loss_mse": 0.0024489148054271936, "lr": 6.861669287067261e-05, "grad_norm": 0.12268591672182083, "wall_ms": 1613012}
{"step": 116600, "loss": 0.28601452708244324, "loss_nce": 0.28576064109802246, "loss_mse": 0.00253892969340086, "lr": 6.828182873636131e-05, "grad_norm": 0.13979265093803406, "wall_ms": 1614257}
{"step": 116700, "loss": 0.24999301135540009, "loss_nce": 0.2497454285621643, "loss_mse": 0.0024757706560194492, "lr": 6.79477946699759e-05, "grad_norm": 0.13289301097393036, "wall_ms": 1615495}
{"step": 116800, "loss": 0.1883694976568222, "loss_nce": 0.18812182545661926, "loss_mse": 0.002476717345416546, "lr": 6.761459215648675e-05, "grad_norm": 0.10287255048751831, "wall_ms": 1616738}
{"step": 116900, "loss": 0.2405836582183838, "loss_nce": 0.24033421277999878, "loss_mse": 0.0024944492615759373, "lr": 6.728222267716758e-05, "grad_norm": 0.1366211324930191, "wall_ms": 1617979}
{"step": 117000, "loss": 0.23281258344650269, "loss_nce": 0.23256558179855347, "loss_mse": 0.0024699594359844923, "lr": 6.695068770958877e-05, "grad_norm": 0.1303912103176117, "wall_ms": 1619219}
{"step": 117100, "loss": 0.2891005277633667, "loss_nce": 0.28885120153427124, "loss_mse": 0.00249322853051126, "lr": 6.661998872761094e-05, "grad_norm": 0.13497738540172577, "wall_ms": 1620458}
{"step": 117200, "loss": 0.23506510257720947, "loss_nce": 0.23481813073158264, "loss_mse": 0.0024697263725101948, "lr": 6.629012720137815e-05, "grad_norm": 0.11966636031866074, "wall_ms": 1622053}
{"step": 117300, "loss": 0.22260400652885437, "loss_nce": 0.2223469316959381, "loss_mse": 0.0025707518216222525, "lr": 6.596110459731154e-05, "grad_norm": 0.11855391412973404, "wall_ms": 1623292}
{"step": 117400, "loss": 0.24003829061985016, "loss_nce": 0.23978778719902039, "loss_mse": 0.0025051019620150328, "lr": 6.563292237810266e-05, "grad_norm": 0.12831494212150574, "wall_ms": 1624533}
{"step": 117500, "loss": 0.2249615490436554, "loss_nce": 0.22471150755882263, "loss_mse": 0.002500366885215044, "lr": 6.530558200270737e-05, "grad_norm": 0.12033634632825851, "wall_ms": 1625773}
{"step": 117600, "loss": 0.23826485872268677, "loss_nce": 0.23802044987678528, "loss_mse": 0.00244408892467618, "lr": 6.497908492633876e-05, "grad_norm": 0.12283826619386673, "wall_ms": 1627011}
{"step": 117700, "loss": 0.220778688788414, "loss_nce": 0.22052626311779022, "loss_mse": 0.0025242320261895657, "lr": 6.465343260046109e-05, "grad_norm": 0.12223368138074875, "wall_ms": 1628249}
{"step": 117800, "loss": 0.2177533507347107, "loss_nce": 0.21750734746456146, "loss_mse": 0.0024601020850241184, "lr": 6.43286264727832e-05, "grad_norm": 0.11070412397384644, "wall_ms": 1629487}
{"step": 117900, "loss": 0.2194415181875229, "loss_nce": 0.21919262409210205, "loss_mse": 0.002488987520337105, "lr": 6.400466798725212e-05, "grad_norm": 0.12948276102542877, "wall_ms": 1630730}
{"step": 118000, "loss": 0.24686691164970398, "loss_nce": 0.246618390083313, "loss_mse": 0.0024852592032402754, "lr": 6.368155858404667e-05, "grad_norm": 0.12721943855285645, "wall_ms": 1631969}
{"step": 118100, "loss": 0.22130322456359863, "loss_nce": 0.22105270624160767, "loss_mse": 0.0025051359552890062, "lr": 6.335929969957096e-05, "grad_norm": 0.1286250203847885, "wall_ms": 1633213}
{"step": 118200, "loss": 0.2173549383878708, "loss_nce": 0.2171100676059723, "loss_mse": 0.002448767889291048, "lr": 6.303789276644809e-05, "grad_norm": 0.11928651481866837, "wall_ms": 1634461}
{"step": 118300, "loss": 0.23930858075618744, "loss_nce": 0.239065021276474, "loss_mse": 0.0024356029462069273, "lr": 6.271733921351362e-05, "grad_norm": 0.12287846952676773, "wall_ms": 1635703}
{"step": 118400, "loss": 0.26933127641677856, "loss_nce": 0.2690804898738861, "loss_mse": 0.002507723867893219, "lr": 6.23976404658097e-05, "grad_norm": 0.13810612261295319, "wall_ms": 1636942}
{"step": 118500, "loss": 0.2392500638961792, "loss_nce": 0.23900115489959717, "loss_mse": 0.0024890375789254904, "lr": 6.207879794457805e-05, "grad_norm": 0.12820324301719666, "wall_ms": 1638208}
{"step": 118600, "loss": 0.21147401630878448, "loss_nce": 0.21122683584690094, "loss_mse": 0.0024717908818274736, "lr": 6.17608130672541e-05, "grad_norm": 0.10824102908372879, "wall_ms": 1639465}
{"step": 118700, "loss": 0.22028346359729767, "loss_nce": 0.22004076838493347, "loss_mse": 0.002426879946142435, "lr": 6.144368724746048e-05, "grad_norm": 0.11543789505958557, "wall_ms": 1640724}
{"step": 118800, "loss": 0.19924810528755188, "loss_nce": 0.198996439576149, "loss_mse": 0.002516666427254677, "lr": 6.112742189500104e-05, "grad_norm": 0.10928165167570114, "wall_ms": 1642000}
{"step": 118900, "loss": 0.22632643580436707, "loss_nce": 0.22608274221420288, "loss_mse": 0.002437001559883356, "lr": 6.08120184158542e-05, "grad_norm": 0.12252072244882584, "wall_ms": 1643262}
{"step": 119000, "loss": 0.2380133718252182, "loss_nce": 0.2377679944038391, "loss_mse": 0.002453844528645277, "lr": 6.049747821216686e-05, "grad_norm": 0.1232597753405571, "wall_ms": 1644521}
{"step": 119100, "loss": 0.23486806452274323, "loss_nce": 0.23462344706058502, "loss_mse": 0.002446166006848216, "lr": 6.018380268224826e-05, "grad_norm": 0.12320351600646973, "wall_ms": 1645790}
{"step": 119200, "loss": 0.21793252229690552, "loss_nce": 0.21768449246883392, "loss_mse": 0.002480258233845234, "lr": 5.987099322056347e-05, "grad_norm": 0.11289165169000626, "wall_ms": 1647048}
{"step": 119300, "loss": 0.21081525087356567, "loss_nce": 0.21057146787643433, "loss_mse": 0.002437799936160445, "lr": 5.955905121772777e-05, "grad_norm": 0.11835378408432007, "wall_ms": 1648326}
{"step": 119400, "loss": 0.21593409776687622, "loss_nce": 0.21568423509597778, "loss_mse": 0.002498689340427518, "lr": 5.924797806049975e-05, "grad_norm": 0.12148839980363846, "wall_ms": 1649593}
{"step": 119500, "loss": 0.2353401631116867, "loss_nce": 0.23509129881858826, "loss_mse": 0.002488622209057212, "lr": 5.893777513177564e-05, "grad_norm": 0.13070480525493622, "wall_ms": 1650858}
{"step": 119600, "loss": 0.231381356716156, "loss_nce": 0.23113593459129333, "loss_mse": 0.002454150468111038, "lr": 5.862844381058299e-05, "grad_norm": 0.1372678279876709, "wall_ms": 1652132}
{"step": 119700, "loss": 0.2208257019519806, "loss_nce": 0.22057977318763733, "loss_mse": 0.002459291834384203, "lr": 5.8319985472074544e-05, "grad_norm": 0.12399023026227951, "wall_ms": 1653399}
{"step": 119800, "loss": 0.2106812298297882, "loss_nce": 0.21043258905410767, "loss_mse": 0.0024863346479833126, "lr": 5.801240148752216e-05, "grad_norm": 0.11251827329397202, "wall_ms": 1654662}
{"step": 119900, "loss": 0.20259232819080353, "loss_nce": 0.20234224200248718, "loss_mse": 0.002500813687220216, "lr": 5.770569322431072e-05, "grad_norm": 0.11859897524118423, "wall_ms": 1655917}
{"step": 120000, "loss": 0.23331820964813232, "loss_nce": 0.23306705057621002, "loss_mse": 0.0025115415919572115, "lr": 5.739986204593194e-05, "grad_norm": 0.130684956908226, "wall_ms": 1657176}
{"step": 120100, "loss": 0.2495289295911789, "loss_nce": 0.24928000569343567, "loss_mse": 0.0024892480578273535, "lr": 5.709490931197857e-05, "grad_norm": 0.11854473501443863, "wall_ms": 1669947}
{"step": 120200, "loss": 0.22531992197036743, "loss_nce": 0.2250705510377884, "loss_mse": 0.0024937340058386326, "lr": 5.67908363781379e-05, "grad_norm": 0.14161579310894012, "wall_ms": 1671211}
{"step": 120300, "loss": 0.2602124810218811, "loss_nce": 0.25996214151382446, "loss_mse": 0.0025034400168806314, "lr": 5.648764459618636e-05, "grad_norm": 0.134452223777771, "wall_ms": 1672463}
{"step": 120400, "loss": 0.20632760226726532, "loss_nce": 0.20608410239219666, "loss_mse": 0.0024349335581064224, "lr": 5.618533531398293e-05, "grad_norm": 0.1096666008234024, "wall_ms": 1673700}
{"step": 120500, "loss": 0.2469954490661621, "loss_nce": 0.2467411756515503, "loss_mse": 0.002542777918279171, "lr": 5.5883909875463324e-05, "grad_norm": 0.1276489794254303, "wall_ms": 1674935}
{"step": 120600, "loss": 0.188716322183609, "loss_nce": 0.18846820294857025, "loss_mse": 0.002481167670339346, "lr": 5.558336962063434e-05, "grad_norm": 0.11256314069032669, "wall_ms": 1676171}
{"step": 120700, "loss": 0.22648638486862183, "loss_nce": 0.22623881697654724, "loss_mse": 0.0024756647180765867, "lr": 5.528371588556741e-05, "grad_norm": 0.11467577517032623, "wall_ms": 1677407}
{"step": 120800, "loss": 0.23923836648464203, "loss_nce": 0.23898805677890778, "loss_mse": 0.002503050258383155, "lr": 5.498495000239291e-05, "grad_norm": 0.1365099400281906, "wall_ms": 1678644}
{"step": 120900, "loss": 0.2365512251853943, "loss_nce": 0.23630419373512268, "loss_mse": 0.002470330335199833, "lr": 5.468707329929429e-05, "grad_norm": 0.12242010235786438, "wall_ms": 1679892}
{"step": 121000, "loss": 0.24296875298023224, "loss_nce": 0.24271459877490997, "loss_mse": 0.0025414840783923864, "lr": 5.4390087100502015e-05, "grad_norm": 0.12125976383686066, "wall_ms": 1681130}
{"step": 121100, "loss": 0.24997985363006592, "loss_nce": 0.249735489487648, "loss_mse": 0.0024436763487756252, "lr": 5.4093992726287776e-05, "grad_norm": 0.12207953631877899, "wall_ms": 1682371}
{"step": 121200, "loss": 0.22291041910648346, "loss_nce": 0.22265850007534027, "loss_mse": 0.002519182860851288, "lr": 5.379879149295865e-05, "grad_norm": 0.12149904668331146, "wall_ms": 1683609}
{"step": 121300, "loss": 0.19812136888504028, "loss_nce": 0.19787390530109406, "loss_mse": 0.002474585548043251, "lr": 5.350448471285119e-05, "grad_norm": 0.10636251419782639, "wall_ms": 1684858}
{"step": 121400, "loss": 0.22584408521652222, "loss_nce": 0.22559905052185059, "loss_mse": 0.0024503509048372507, "lr": 5.3211073694325506e-05, "grad_norm": 0.1249510794878006, "wall_ms": 1686105}
{"step": 121500, "loss": 0.22524933516979218, "loss_nce": 0.22499766945838928, "loss_mse": 0.0025166484992951155, "lr": 5.291855974175961e-05, "grad_norm": 0.12504959106445312, "wall_ms": 1687354}
{"step": 121600, "loss": 0.23186887800693512, "loss_nce": 0.23161882162094116, "loss_mse": 0.002500586211681366, "lr": 5.262694415554357e-05, "grad_norm": 0.12962652742862701, "wall_ms": 1688601}
{"step": 121700, "loss": 0.24320459365844727, "loss_nce": 0.24295508861541748, "loss_mse": 0.002495053457096219, "lr": 5.233622823207365e-05, "grad_norm": 0.13067924976348877, "wall_ms": 1689847}
{"step": 121800, "loss": 0.21949732303619385, "loss_nce": 0.21924938261508942, "loss_mse": 0.0024793485645204782, "lr": 5.204641326374666e-05, "grad_norm": 0.12057138234376907, "wall_ms": 1691097}
{"step": 121900, "loss": 0.24541166424751282, "loss_nce": 0.24516351521015167, "loss_mse": 0.0024814698845148087, "lr": 5.175750053895404e-05, "grad_norm": 0.1312132477760315, "wall_ms": 1692350}
{"step": 122000, "loss": 0.24623441696166992, "loss_nce": 0.2459840327501297, "loss_mse": 0.00250387005507946, "lr": 5.146949134207645e-05, "grad_norm": 0.14501464366912842, "wall_ms": 1693601}
{"step": 122100, "loss": 0.25143519043922424, "loss_nce": 0.251179575920105, "loss_mse": 0.002556275110691786, "lr": 5.1182386953477675e-05, "grad_norm": 0.1250041425228119, "wall_ms": 1694855}
{"step": 122200, "loss": 0.22435912489891052, "loss_nce": 0.2241126298904419, "loss_mse": 0.002464989200234413, "lr": 5.089618864949916e-05, "grad_norm": 0.11736835539340973, "wall_ms": 1696108}
{"step": 122300, "loss": 0.25114870071411133, "loss_nce": 0.25090354681015015, "loss_mse": 0.0024516787379980087, "lr": 5.061089770245441e-05, "grad_norm": 0.12393510341644287, "wall_ms": 1697358}
{"step": 122400, "loss": 0.2198837846517563, "loss_nce": 0.2196398675441742, "loss_mse": 0.0024391908664256334, "lr": 5.032651538062304e-05, "grad_norm": 0.11249539256095886, "wall_ms": 1698611}
{"step": 122500, "loss": 0.20886267721652985, "loss_nce": 0.2086161971092224, "loss_mse": 0.0024648234248161316, "lr": 5.00430429482454e-05, "grad_norm": 0.10958343744277954, "wall_ms": 1699854}
{"step": 122600, "loss": 0.19732777774333954, "loss_nce": 0.19707679748535156, "loss_mse": 0.0025097576435655355, "lr": 4.976048166551688e-05, "grad_norm": 0.11831143498420715, "wall_ms": 1701100}
{"step": 122700, "loss": 0.239949032664299, "loss_nce": 0.23969897627830505, "loss_mse": 0.002500592963770032, "lr": 4.9478832788582204e-05, "grad_norm": 0.11887343972921371, "wall_ms": 1702343}
{"step": 122800, "loss": 0.24905064702033997, "loss_nce": 0.24880264699459076, "loss_mse": 0.002480023307725787, "lr": 4.919809756953008e-05, "grad_norm": 0.13716109097003937, "wall_ms": 1703601}
{"step": 122900, "loss": 0.21305829286575317, "loss_nce": 0.21280986070632935, "loss_mse": 0.002484378172084689, "lr": 4.891827725638732e-05, "grad_norm": 0.12302755564451218, "wall_ms": 1704858}
{"step": 123000, "loss": 0.21949847042560577, "loss_nce": 0.21925166249275208, "loss_mse": 0.0024680292699486017, "lr": 4.863937309311364e-05, "grad_norm": 0.1266208440065384, "wall_ms": 1706113}
{"step": 123100, "loss": 0.2318531572818756, "loss_nce": 0.23160357773303986, "loss_mse": 0.002495823660865426, "lr": 4.8361386319595833e-05, "grad_norm": 0.1221327856183052, "wall_ms": 1707367}
{"step": 123200, "loss": 0.2249983251094818, "loss_nce": 0.22475141286849976, "loss_mse": 0.002469116123393178, "lr": 4.808431817164242e-05, "grad_norm": 0.1232595220208168, "wall_ms": 1708614}
{"step": 123300, "loss": 0.21508152782917023, "loss_nce": 0.21483685076236725, "loss_mse": 0.0024468358606100082, "lr": 4.780816988097809e-05, "grad_norm": 0.11836639791727066, "wall_ms": 1709853}
{"step": 123400, "loss": 0.24195614457130432, "loss_nce": 0.24170565605163574, "loss_mse": 0.0025049082469195127, "lr": 4.753294267523822e-05, "grad_norm": 0.12888503074645996, "wall_ms": 1711096}
{"step": 123500, "loss": 0.2473483830690384, "loss_nce": 0.24709339439868927, "loss_mse": 0.002549914875999093, "lr": 4.725863777796352e-05, "grad_norm": 0.12880726158618927, "wall_ms": 1712361}
{"step": 123600, "loss": 0.22344720363616943, "loss_nce": 0.2231992781162262, "loss_mse": 0.0024792207404971123, "lr": 4.6985256408594435e-05, "grad_norm": 0.12147971987724304, "wall_ms": 1713639}
{"step": 123700, "loss": 0.22942638397216797, "loss_nce": 0.229173481464386, "loss_mse": 0.00252900249324739, "lr": 4.671279978246582e-05, "grad_norm": 0.13775870203971863, "wall_ms": 1714917}
{"step": 123800, "loss": 0.23547248542308807, "loss_nce": 0.2352234125137329, "loss_mse": 0.002490766579285264, "lr": 4.644126911080167e-05, "grad_norm": 0.1254127025604248, "wall_ms": 1716202}
{"step": 123900, "loss": 0.22505691647529602, "loss_nce": 0.22480468451976776, "loss_mse": 0.002522332826629281, "lr": 4.6170665600709314e-05, "grad_norm": 0.11718210577964783, "wall_ms": 1717469}
{"step": 124000, "loss": 0.21951113641262054, "loss_nce": 0.2192612737417221, "loss_mse": 0.0024986539501696825, "lr": 4.590099045517461e-05, "grad_norm": 0.12015735357999802, "wall_ms": 1718730}
{"step": 124100, "loss": 0.23794083297252655, "loss_nce": 0.23769506812095642, "loss_mse": 0.00245769158937037, "lr": 4.563224487305612e-05, "grad_norm": 0.12367431074380875, "wall_ms": 1719989}
{"step": 124200, "loss": 0.23446199297904968, "loss_nce": 0.23421643674373627, "loss_mse": 0.00245550530962646, "lr": 4.536443004908002e-05, "grad_norm": 0.13930894434452057, "wall_ms": 1721260}
{"step": 124300, "loss": 0.2190794199705124, "loss_nce": 0.2188333421945572, "loss_mse": 0.002460768213495612, "lr": 4.5097547173834764e-05, "grad_norm": 0.11705072224140167, "wall_ms": 1722536}
{"step": 124400, "loss": 0.20373880863189697, "loss_nce": 0.2034890353679657, "loss_mse": 0.0024976865388453007, "lr": 4.483159743376573e-05, "grad_norm": 0.11575169116258621, "wall_ms": 1723799}
{"step": 124500, "loss": 0.23397038877010345, "loss_nce": 0.23372098803520203, "loss_mse": 0.002494054613634944, "lr": 4.4566582011170005e-05, "grad_norm": 0.11097590625286102, "wall_ms": 1725075}
{"step": 124600, "loss": 0.248483344912529, "loss_nce": 0.2482336461544037, "loss_mse": 0.002496986649930477, "lr": 4.4302502084191094e-05, "grad_norm": 0.13290618360042572, "wall_ms": 1726365}
{"step": 124700, "loss": 0.254983127117157, "loss_nce": 0.25473713874816895, "loss_mse": 0.002459937008097768, "lr": 4.403935882681371e-05, "grad_norm": 0.13202016055583954, "wall_ms": 1727671}
{"step": 124800, "loss": 0.21932704746723175, "loss_nce": 0.2190842479467392, "loss_mse": 0.00242806738242507, "lr": 4.377715340885846e-05, "grad_norm": 0.1289404332637787, "wall_ms": 1728945}
{"step": 124900, "loss": 0.2294570654630661, "loss_nce": 0.22920683026313782, "loss_mse": 0.0025023524649441242, "lr": 4.3515886995976945e-05, "grad_norm": 0.12256420403718948, "wall_ms": 1730196}
{"step": 125000, "loss": 0.22748027741909027, "loss_nce": 0.22723087668418884, "loss_mse": 0.0024940178263932467, "lr": 4.3255560749646124e-05, "grad_norm": 0.12484847754240036, "wall_ms": 1731465}
{"step": 125100, "loss": 0.21739056706428528, "loss_nce": 0.21714237332344055, "loss_mse": 0.002481973497197032, "lr": 4.2996175827163484e-05, "grad_norm": 0.11786056309938431, "wall_ms": 1732731}
{"step": 125200, "loss": 0.24531938135623932, "loss_nce": 0.2450694441795349, "loss_mse": 0.002499359892681241, "lr": 4.273773338164176e-05, "grad_norm": 0.13298259675502777, "wall_ms": 1733994}
{"step": 125300, "loss": 0.20813356339931488, "loss_nce": 0.20788556337356567, "loss_mse": 0.002480018651112914, "lr": 4.248023456200385e-05, "grad_norm": 0.11956667900085449, "wall_ms": 1735248}
{"step": 125400, "loss": 0.23740820586681366, "loss_nce": 0.23715554177761078, "loss_mse": 0.002526655327528715, "lr": 4.22236805129777e-05, "grad_norm": 0.12399647384881973, "wall_ms": 1736505}
{"step": 125500, "loss": 0.20709238946437836, "loss_nce": 0.2068464457988739, "loss_mse": 0.0024594489950686693, "lr": 4.1968072375091314e-05, "grad_norm": 0.11791178584098816, "wall_ms": 1737758}
{"step": 125600, "loss": 0.2350281924009323, "loss_nce": 0.23478087782859802, "loss_mse": 0.0024731429293751717, "lr": 4.171341128466744e-05, "grad_norm": 0.13076795637607574, "wall_ms": 1739037}
{"step": 125700, "loss": 0.22794312238693237, "loss_nce": 0.22769595682621002, "loss_mse": 0.0024717103224247694, "lr": 4.1459698373818686e-05, "grad_norm": 0.12318785488605499, "wall_ms": 1740326}
{"step": 125800, "loss": 0.2546784579753876, "loss_nce": 0.2544258236885071, "loss_mse": 0.002526418073102832, "lr": 4.120693477044263e-05, "grad_norm": 0.1318008452653885, "wall_ms": 1741610}
{"step": 125900, "loss": 0.2169303297996521, "loss_nce": 0.21668633818626404, "loss_mse": 0.0024399273097515106, "lr": 4.095512159821641e-05, "grad_norm": 0.13066460192203522, "wall_ms": 1742897}
{"step": 126000, "loss": 0.24799279868602753, "loss_nce": 0.24774937331676483, "loss_mse": 0.0024342818651348352, "lr": 4.0704259976592095e-05, "grad_norm": 0.1346234530210495, "wall_ms": 1744183}
{"step": 126100, "loss": 0.23674733936786652, "loss_nce": 0.23649805784225464, "loss_mse": 0.002492793370038271, "lr": 4.0454351020791545e-05, "grad_norm": 0.12754137814044952, "wall_ms": 1745472}
{"step": 126200, "loss": 0.22812390327453613, "loss_nce": 0.2278730273246765, "loss_mse": 0.0025088011752814054, "lr": 4.020539584180149e-05, "grad_norm": 0.11725908517837524, "wall_ms": 1746756}
{"step": 126300, "loss": 0.2226644903421402, "loss_nce": 0.22241489589214325, "loss_mse": 0.0024959042202681303, "lr": 3.995739554636856e-05, "grad_norm": 0.12151473760604858, "wall_ms": 1748046}
{"step": 126400, "loss": 0.2548319101333618, "loss_nce": 0.2545816898345947, "loss_mse": 0.0025021268520504236, "lr": 3.971035123699438e-05, "grad_norm": 0.1400008201599121, "wall_ms": 1749321}
{"step": 126500, "loss": 0.24793510138988495, "loss_nce": 0.24768434464931488, "loss_mse": 0.002507551806047559, "lr": 3.94642640119307e-05, "grad_norm": 0.13267619907855988, "wall_ms": 1750579}
{"step": 126600, "loss": 0.2236100733280182, "loss_nce": 0.22335503995418549, "loss_mse": 0.002550276927649975, "lr": 3.9219134965174465e-05, "grad_norm": 0.11832466721534729, "wall_ms": 1751840}
{"step": 126700, "loss": 0.24305978417396545, "loss_nce": 0.24280597269535065, "loss_mse": 0.0025380877777934074, "lr": 3.897496518646307e-05, "grad_norm": 0.14209160208702087, "wall_ms": 1753108}
{"step": 126800, "loss": 0.24183709919452667, "loss_nce": 0.24159659445285797, "loss_mse": 0.0024050737265497446, "lr": 3.8731755761269334e-05, "grad_norm": 0.12881170213222504, "wall_ms": 1754375}
{"step": 126900, "loss": 0.2413482517004013, "loss_nce": 0.24109461903572083, "loss_mse": 0.0025363247841596603, "lr": 3.8489507770796706e-05, "grad_norm": 0.12561944127082825, "wall_ms": 1755631}
{"step": 127000, "loss": 0.23999102413654327, "loss_nce": 0.23974251747131348, "loss_mse": 0.0024851192720234394, "lr": 3.824822229197462e-05, "grad_norm": 0.12899507582187653, "wall_ms": 1756891}
{"step": 127100, "loss": 0.22642730176448822, "loss_nce": 0.22617754340171814, "loss_mse": 0.0024975482374429703, "lr": 3.800790039745354e-05, "grad_norm": 0.1167914941906929, "wall_ms": 1758159}
{"step": 127200, "loss": 0.21240536868572235, "loss_nce": 0.21216022968292236, "loss_mse": 0.0024514091201126575, "lr": 3.7768543155600264e-05, "grad_norm": 0.11232084780931473, "wall_ms": 1759423}
{"step": 127300, "loss": 0.24649839103221893, "loss_nce": 0.24625138938426971, "loss_mse": 0.0024700025096535683, "lr": 3.7530151630493224e-05, "grad_norm": 0.12513549625873566, "wall_ms": 1760673}
{"step": 127400, "loss": 0.21544189751148224, "loss_nce": 0.2151937484741211, "loss_mse": 0.0024814216885715723, "lr": 3.7292726881917614e-05, "grad_norm": 0.12356183677911758, "wall_ms": 1761934}
{"step": 127500, "loss": 0.26177331805229187, "loss_nce": 0.26152223348617554, "loss_mse": 0.0025108670815825462, "lr": 3.705626996536075e-05, "grad_norm": 0.14105460047721863, "wall_ms": 1763187}
{"step": 127600, "loss": 0.2268482893705368, "loss_nce": 0.2265973687171936, "loss_mse": 0.002509246114641428, "lr": 3.682078193200752e-05, "grad_norm": 0.15338943898677826, "wall_ms": 1764447}
{"step": 127700, "loss": 0.2091202735900879, "loss_nce": 0.20887772738933563, "loss_mse": 0.0024254275485873222, "lr": 3.658626382873542e-05, "grad_norm": 0.11837489902973175, "wall_ms": 1765718}
{"step": 127800, "loss": 0.19848743081092834, "loss_nce": 0.19823752343654633, "loss_mse": 0.002499000634998083, "lr": 3.635271669811014e-05, "grad_norm": 0.10954549163579941, "wall_ms": 1766982}
{"step": 127900, "loss": 0.25389641523361206, "loss_nce": 0.25364699959754944, "loss_mse": 0.0024940487928688526, "lr": 3.6120141578380814e-05, "grad_norm": 0.13516154885292053, "wall_ms": 1768242}
{"step": 128000, "loss": 0.2160891890525818, "loss_nce": 0.21583960950374603, "loss_mse": 0.002495724009349942, "lr": 3.5888539503475436e-05, "grad_norm": 0.12164382636547089, "wall_ms": 1769507}
{"step": 128100, "loss": 0.20971858501434326, "loss_nce": 0.20947232842445374, "loss_mse": 0.0024624995421618223, "lr": 3.5657911502996306e-05, "grad_norm": 0.11827095597982407, "wall_ms": 1770751}
{"step": 128200, "loss": 0.22993426024913788, "loss_nce": 0.22968590259552002, "loss_mse": 0.00248354091309011, "lr": 3.542825860221539e-05, "grad_norm": 0.12330909818410873, "wall_ms": 1771997}
{"step": 128300, "loss": 0.23023289442062378, "loss_nce": 0.2299874871969223, "loss_mse": 0.0024540002923458815, "lr": 3.519958182206977e-05, "grad_norm": 0.12618376314640045, "wall_ms": 1773246}
{"step": 128400, "loss": 0.24326685070991516, "loss_nce": 0.24301785230636597, "loss_mse": 0.00248997425660491, "lr": 3.49718821791571e-05, "grad_norm": 0.12361907213926315, "wall_ms": 1774497}
{"step": 128500, "loss": 0.20953981578350067, "loss_nce": 0.209293931722641, "loss_mse": 0.0024588946253061295, "lr": 3.4745160685731214e-05, "grad_norm": 0.1231861263513565, "wall_ms": 1775737}
{"step": 128600, "loss": 0.1974286139011383, "loss_nce": 0.19718579947948456, "loss_mse": 0.0024282021913677454, "lr": 3.4519418349697386e-05, "grad_norm": 0.10824217647314072, "wall_ms": 1776979}
{"step": 128700, "loss": 0.23666682839393616, "loss_nce": 0.23641780018806458, "loss_mse": 0.0024902583099901676, "lr": 3.4294656174608044e-05, "grad_norm": 0.1266939342021942, "wall_ms": 1778221}
{"step": 128800, "loss": 0.23545871675014496, "loss_nce": 0.23520806431770325, "loss_mse": 0.00250652595423162, "lr": 3.407087515965827e-05, "grad_norm": 0.1343173235654831, "wall_ms": 1779465}
{"step": 128900, "loss": 0.20372042059898376, "loss_nce": 0.2034696638584137, "loss_mse": 0.002507501980289817, "lr": 3.3848076299681205e-05, "grad_norm": 0.1148504763841629, "wall_ms": 1780704}
{"step": 129000, "loss": 0.2228735089302063, "loss_nce": 0.22262433171272278, "loss_mse": 0.002491833409294486, "lr": 3.362626058514396e-05, "grad_norm": 0.11197404563426971, "wall_ms": 1781943}
{"step": 129100, "loss": 0.22633546590805054, "loss_nce": 0.2260863184928894, "loss_mse": 0.00249148509465158, "lr": 3.340542900214285e-05, "grad_norm": 0.1315678060054779, "wall_ms": 1783184}
{"step": 129200, "loss": 0.2262694239616394, "loss_nce": 0.22601741552352905, "loss_mse": 0.0025200501549988985, "lr": 3.318558253239925e-05, "grad_norm": 0.12390130758285522, "wall_ms": 1784426}
{"step": 129300, "loss": 0.2314116358757019, "loss_nce": 0.2311617136001587, "loss_mse": 0.002499242778867483, "lr": 3.296672215325511e-05, "grad_norm": 0.12878482043743134, "wall_ms": 1785664}
{"step": 129400, "loss": 0.21280567348003387, "loss_nce": 0.21255701780319214, "loss_mse": 0.002486559096723795, "lr": 3.274884883766858e-05, "grad_norm": 0.11968754231929779, "wall_ms": 1786906}
{"step": 129500, "loss": 0.215654194355011, "loss_nce": 0.21540585160255432, "loss_mse": 0.002483391435816884, "lr": 3.253196355420997e-05, "grad_norm": 0.11772321909666061, "wall_ms": 1788152}
{"step": 129600, "loss": 0.2373565286397934, "loss_nce": 0.2371123731136322, "loss_mse": 0.0024415578227490187, "lr": 3.2316067267056976e-05, "grad_norm": 0.13345055282115936, "wall_ms": 1789401}
{"step": 129700, "loss": 0.23196536302566528, "loss_nce": 0.231717050075531, "loss_mse": 0.0024831623304635286, "lr": 3.210116093599083e-05, "grad_norm": 0.12847472727298737, "wall_ms": 1790645}
{"step": 129800, "loss": 0.23715028166770935, "loss_nce": 0.23690012097358704, "loss_mse": 0.0025016157887876034, "lr": 3.188724551639176e-05, "grad_norm": 0.13114765286445618, "wall_ms": 1791896}
{"step": 129900, "loss": 0.21901442110538483, "loss_nce": 0.21876344084739685, "loss_mse": 0.0025098654441535473, "lr": 3.1674321959234814e-05, "grad_norm": 0.1214287206530571, "wall_ms": 1793138}
{"step": 130000, "loss": 0.23139873147010803, "loss_nce": 0.23114779591560364, "loss_mse": 0.0025093003641813993, "lr": 3.146239121108573e-05, "grad_norm": 0.12406221032142639, "wall_ms": 1794377}
{"step": 130100, "loss": 0.2626881003379822, "loss_nce": 0.26243525743484497, "loss_mse": 0.0025283959694206715, "lr": 3.125145421409658e-05, "grad_norm": 0.1366499811410904, "wall_ms": 1807442}
{"step": 130200, "loss": 0.23440077900886536, "loss_nce": 0.2341521978378296, "loss_mse": 0.002485860139131546, "lr": 3.104151190600166e-05, "grad_norm": 0.12525278329849243, "wall_ms": 1808682}
{"step": 130300, "loss": 0.22984635829925537, "loss_nce": 0.22959792613983154, "loss_mse": 0.0024843253195285797, "lr": 3.083256522011332e-05, "grad_norm": 0.12531638145446777, "wall_ms": 1809922}
{"step": 130400, "loss": 0.22702844440937042, "loss_nce": 0.2267778068780899, "loss_mse": 0.0025064239744096994, "lr": 3.0624615085317874e-05, "grad_norm": 0.11330032348632812, "wall_ms": 1811162}
{"step": 130500, "loss": 0.2377283126115799, "loss_nce": 0.23747628927230835, "loss_mse": 0.00252017704769969, "lr": 3.0417662426071323e-05, "grad_norm": 0.14072294533252716, "wall_ms": 1812402}
{"step": 130600, "loss": 0.24738745391368866, "loss_nce": 0.24714121222496033, "loss_mse": 0.0024624550715088844, "lr": 3.021170816239526e-05, "grad_norm": 0.13352885842323303, "wall_ms": 1813643}
{"step": 130700, "loss": 0.23398014903068542, "loss_nce": 0.2337295114994049, "loss_mse": 0.0025063708890229464, "lr": 3.0006753209873047e-05, "grad_norm": 0.1308949738740921, "wall_ms": 1814886}
{"step": 130800, "loss": 0.21612870693206787, "loss_nce": 0.21588170528411865, "loss_mse": 0.002469955710694194, "lr": 2.9802798479645362e-05, "grad_norm": 0.11163880676031113, "wall_ms": 1816127}
{"step": 130900, "loss": 0.21606557071208954, "loss_nce": 0.21581697463989258, "loss_mse": 0.0024860240519046783, "lr": 2.9599844878406398e-05, "grad_norm": 0.11659961938858032, "wall_ms": 1817368}
{"step": 131000, "loss": 0.19875602424144745, "loss_nce": 0.1985093057155609, "loss_mse": 0.002467257669195533, "lr": 2.939789330839972e-05, "grad_norm": 0.10430517047643661, "wall_ms": 1818609}
{"step": 131100, "loss": 0.20717483758926392, "loss_nce": 0.20693233609199524, "loss_mse": 0.0024250654969364405, "lr": 2.9196944667414385e-05, "grad_norm": 0.11147230118513107, "wall_ms": 1819853}
{"step": 131200, "loss": 0.218729168176651, "loss_nce": 0.21848182380199432, "loss_mse": 0.002473418600857258, "lr": 2.8996999848780702e-05, "grad_norm": 0.12565749883651733, "wall_ms": 1821097}
{"step": 131300, "loss": 0.27819928526878357, "loss_nce": 0.27795183658599854, "loss_mse": 0.002474586246535182, "lr": 2.8798059741366684e-05, "grad_norm": 0.13334214687347412, "wall_ms": 1822347}
{"step": 131400, "loss": 0.2540252208709717, "loss_nce": 0.253779798746109, "loss_mse": 0.002454163506627083, "lr": 2.860012522957359e-05, "grad_norm": 0.1268150508403778, "wall_ms": 1823585}
{"step": 131500, "loss": 0.21238917112350464, "loss_nce": 0.2121376097202301, "loss_mse": 0.002515661995857954, "lr": 2.840319719333234e-05, "grad_norm": 0.11172448098659515, "wall_ms": 1824826}
{"step": 131600, "loss": 0.2840699255466461, "loss_nce": 0.2838153541088104, "loss_mse": 0.0025457951705902815, "lr": 2.820727650809949e-05, "grad_norm": 0.14583344757556915, "wall_ms": 1826071}
{"step": 131700, "loss": 0.24140888452529907, "loss_nce": 0.24116337299346924, "loss_mse": 0.002455173060297966, "lr": 2.8012364044853333e-05, "grad_norm": 0.12408033758401871, "wall_ms": 1827317}
{"step": 131800, "loss": 0.22301003336906433, "loss_nce": 0.2227628529071808, "loss_mse": 0.002471841871738434, "lr": 2.7818460670090098e-05, "grad_norm": 0.11251121759414673, "wall_ms": 1828557}
{"step": 131900, "loss": 0.2571984529495239, "loss_nce": 0.25694942474365234, "loss_mse": 0.002490334678441286, "lr": 2.7625567245819957e-05, "grad_norm": 0.12602603435516357, "wall_ms": 1829817}
{"step": 132000, "loss": 0.20279774069786072, "loss_nce": 0.20254817605018616, "loss_mse": 0.0024956678971648216, "lr": 2.743368462956333e-05, "grad_norm": 0.1087329164147377, "wall_ms": 1831067}
{"step": 132100, "loss": 0.21954259276390076, "loss_nce": 0.21929726004600525, "loss_mse": 0.002453305060043931, "lr": 2.724281367434704e-05, "grad_norm": 0.11377254873514175, "wall_ms": 1832320}
{"step": 132200, "loss": 0.21364420652389526, "loss_nce": 0.2133904993534088, "loss_mse": 0.0025370586663484573, "lr": 2.7052955228700516e-05, "grad_norm": 0.12603998184204102, "wall_ms": 1833580}
{"step": 132300, "loss": 0.2580808401107788, "loss_nce": 0.25783440470695496, "loss_mse": 0.0024643619544804096, "lr": 2.6864110136651964e-05, "grad_norm": 0.13517646491527557, "wall_ms": 1834838}
{"step": 132400, "loss": 0.25995323061943054, "loss_nce": 0.2597016394138336, "loss_mse": 0.0025157637428492308, "lr": 2.6676279237724635e-05, "grad_norm": 0.1382984071969986, "wall_ms": 1836095}
{"step": 132500, "loss": 0.20456701517105103, "loss_nce": 0.20431572198867798, "loss_mse": 0.002512935781851411, "lr": 2.6489463366933212e-05, "grad_norm": 0.11403883993625641, "wall_ms": 1837363}
{"step": 132600, "loss": 0.22822020947933197, "loss_nce": 0.2279723882675171, "loss_mse": 0.0024782565888017416, "lr": 2.6303663354779925e-05, "grad_norm": 0.12057499587535858, "wall_ms": 1838634}
{"step": 132700, "loss": 0.23324012756347656, "loss_nce": 0.2329898476600647, "loss_mse": 0.0025028649251908064, "lr": 2.6118880027251005e-05, "grad_norm": 0.12400732189416885, "wall_ms": 1839897}
{"step": 132800, "loss": 0.22033479809761047, "loss_nce": 0.22008416056632996, "loss_mse": 0.00250630103982985, "lr": 2.593511420581284e-05, "grad_norm": 0.12240590900182724, "wall_ms": 1841139}
{"step": 132900, "loss": 0.2284337878227234, "loss_nce": 0.2281913310289383, "loss_mse": 0.0024246005341410637, "lr": 2.5752366707408512e-05, "grad_norm": 0.12392719089984894, "wall_ms": 1842378}
{"step": 133000, "loss": 0.26220014691352844, "loss_nce": 0.26195427775382996, "loss_mse": 0.0024586196523159742, "lr": 2.5570638344454038e-05, "grad_norm": 0.13033020496368408, "wall_ms": 1843620}
{"step": 133100, "loss": 0.22039474546909332, "loss_nce": 0.22015056014060974, "loss_mse": 0.002441912656649947, "lr": 2.5389929924834887e-05, "grad_norm": 0.11484859883785248, "wall_ms": 1844862}
{"step": 133200, "loss": 0.22019043564796448, "loss_nce": 0.21994516253471375, "loss_mse": 0.0024526629131287336, "lr": 2.521024225190219e-05, "grad_norm": 0.1177881509065628, "wall_ms": 1846114}
{"step": 133300, "loss": 0.22838518023490906, "loss_nce": 0.2281387746334076, "loss_mse": 0.002464072545990348, "lr": 2.5031576124469354e-05, "grad_norm": 0.12622497975826263, "wall_ms": 1847371}
{"step": 133400, "loss": 0.2233831137418747, "loss_nce": 0.22313827276229858, "loss_mse": 0.0024484542664140463, "lr": 2.4853932336808374e-05, "grad_norm": 0.1183406412601471, "wall_ms": 1848628}
{"step": 133500, "loss": 0.24394290149211884, "loss_nce": 0.2436976134777069, "loss_mse": 0.002452898770570755, "lr": 2.467731167864646e-05, "grad_norm": 0.12882854044437408, "wall_ms": 1849891}
{"step": 133600, "loss": 0.25744202733039856, "loss_nce": 0.25719064474105835, "loss_mse": 0.002513935323804617, "lr": 2.4501714935162342e-05, "grad_norm": 0.14016126096248627, "wall_ms": 1851150}
{"step": 133700, "loss": 0.2342870682477951, "loss_nce": 0.234039768576622, "loss_mse": 0.0024729566648602486, "lr": 2.432714288698291e-05, "grad_norm": 0.12402720004320145, "wall_ms": 1852407}
{"step": 133800, "loss": 0.22499045729637146, "loss_nce": 0.22473886609077454, "loss_mse": 0.002515872474759817, "lr": 2.4153596310179644e-05, "grad_norm": 0.12299757450819016, "wall_ms": 1853659}
{"step": 133900, "loss": 0.2254563570022583, "loss_nce": 0.22521184384822845, "loss_mse": 0.0024451701901853085, "lr": 2.3981075976265392e-05, "grad_norm": 0.12526120245456696, "wall_ms": 1854912}
{"step": 134000, "loss": 0.21308517456054688, "loss_nce": 0.21283715963363647, "loss_mse": 0.002480132272467017, "lr": 2.380958265219062e-05, "grad_norm": 0.1260845959186554, "wall_ms": 1856164}
{"step": 134100, "loss": 0.27676117420196533, "loss_nce": 0.2765042781829834, "loss_mse": 0.002569016069173813, "lr": 2.3639117100340135e-05, "grad_norm": 0.13252443075180054, "wall_ms": 1857425}
{"step": 134200, "loss": 0.2698446810245514, "loss_nce": 0.26959311962127686, "loss_mse": 0.0025154685135930777, "lr": 2.3469680078529846e-05, "grad_norm": 0.1342514604330063, "wall_ms": 1858707}
{"step": 134300, "loss": 0.23369178175926208, "loss_nce": 0.23344305157661438, "loss_mse": 0.002487300196662545, "lr": 2.3301272340003172e-05, "grad_norm": 0.12328741699457169, "wall_ms": 1859990}
{"step": 134400, "loss": 0.24755211174488068, "loss_nce": 0.2473064661026001, "loss_mse": 0.0024564608465880156, "lr": 2.3133894633427785e-05, "grad_norm": 0.12580756843090057, "wall_ms": 1861279}
{"step": 134500, "loss": 0.21849319338798523, "loss_nce": 0.21824181079864502, "loss_mse": 0.0025138934142887592, "lr": 2.2967547702892316e-05, "grad_norm": 0.12401178479194641, "wall_ms": 1862553}
{"step": 134600, "loss": 0.22260427474975586, "loss_nce": 0.2223527729511261, "loss_mse": 0.0025150624569505453, "lr": 2.2802232287902986e-05, "grad_norm": 0.13297440111637115, "wall_ms": 1863827}
{"step": 134700, "loss": 0.21335329115390778, "loss_nce": 0.21310484409332275, "loss_mse": 0.002484498079866171, "lr": 2.2637949123380357e-05, "grad_norm": 0.10941106081008911, "wall_ms": 1865099}
{"step": 134800, "loss": 0.22609397768974304, "loss_nce": 0.22584423422813416, "loss_mse": 0.002497401786968112, "lr": 2.247469893965609e-05, "grad_norm": 0.12397415190935135, "wall_ms": 1866365}
{"step": 134900, "loss": 0.20765620470046997, "loss_nce": 0.20741558074951172, "loss_mse": 0.0024061959702521563, "lr": 2.2312482462469538e-05, "grad_norm": 0.11634010821580887, "wall_ms": 1867631}
{"step": 135000, "loss": 0.2451612502336502, "loss_nce": 0.2449120283126831, "loss_mse": 0.0024922508746385574, "lr": 2.2151300412964855e-05, "grad_norm": 0.13084597885608673, "wall_ms": 1868910}
{"step": 135100, "loss": 0.23583683371543884, "loss_nce": 0.23558682203292847, "loss_mse": 0.002500076312571764, "lr": 2.1991153507687386e-05, "grad_norm": 0.12859053909778595, "wall_ms": 1870171}
{"step": 135200, "loss": 0.22984682023525238, "loss_nce": 0.22959452867507935, "loss_mse": 0.002522927476093173, "lr": 2.1832042458580743e-05, "grad_norm": 0.12165560573339462, "wall_ms": 1871440}
{"step": 135300, "loss": 0.21143969893455505, "loss_nce": 0.21119379997253418, "loss_mse": 0.002458924427628517, "lr": 2.1673967972983612e-05, "grad_norm": 0.11026855558156967, "wall_ms": 1872705}
{"step": 135400, "loss": 0.24473825097084045, "loss_nce": 0.2444920539855957, "loss_mse": 0.0024619169998914003, "lr": 2.151693075362648e-05, "grad_norm": 0.12855254113674164, "wall_ms": 1873977}
{"step": 135500, "loss": 0.2355288714170456, "loss_nce": 0.23527845740318298, "loss_mse": 0.0025041289627552032, "lr": 2.136093149862866e-05, "grad_norm": 0.127303808927536, "wall_ms": 1875252}
{"step": 135600, "loss": 0.26347965002059937, "loss_nce": 0.26323211193084717, "loss_mse": 0.0024755066260695457, "lr": 2.120597090149508e-05, "grad_norm": 0.1303653120994568, "wall_ms": 1876530}
{"step": 135700, "loss": 0.2503848671913147, "loss_nce": 0.2501365840435028, "loss_mse": 0.002482845913618803, "lr": 2.1052049651113375e-05, "grad_norm": 0.13533993065357208, "wall_ms": 1877805}
{"step": 135800, "loss": 0.24274328351020813, "loss_nce": 0.2424871027469635, "loss_mse": 0.0025617647916078568, "lr": 2.08991684317505e-05, "grad_norm": 0.12682542204856873, "wall_ms": 1879074}
{"step": 135900, "loss": 0.21148310601711273, "loss_nce": 0.21123431622982025, "loss_mse": 0.0024878846015781164, "lr": 2.0747327923050056e-05, "grad_norm": 0.12288873642683029, "wall_ms": 1880337}
{"step": 136000, "loss": 0.2526217997074127, "loss_nce": 0.25237324833869934, "loss_mse": 0.0024853739887475967, "lr": 2.0596528800028997e-05, "grad_norm": 0.12388380616903305, "wall_ms": 1881601}
{"step": 136100, "loss": 0.2072513997554779, "loss_nce": 0.20700937509536743, "loss_mse": 0.0024202601052820683, "lr": 2.0446771733074737e-05, "grad_norm": 0.11697462946176529, "wall_ms": 1882867}
{"step": 136200, "loss": 0.24730610847473145, "loss_nce": 0.24705612659454346, "loss_mse": 0.0024998104199767113, "lr": 2.0298057387942198e-05, "grad_norm": 0.12489888817071915, "wall_ms": 1884130}
{"step": 136300, "loss": 0.2340739667415619, "loss_nce": 0.23382791876792908, "loss_mse": 0.0024604718200862408, "lr": 2.015038642575079e-05, "grad_norm": 0.13371822237968445, "wall_ms": 1885394}
{"step": 136400, "loss": 0.28702715039253235, "loss_nce": 0.2867775559425354, "loss_mse": 0.0024960192386060953, "lr": 2.000375950298148e-05, "grad_norm": 0.15211792290210724, "wall_ms": 1886654}
{"step": 136500, "loss": 0.2292279452085495, "loss_nce": 0.22897984087467194, "loss_mse": 0.0024810186587274075, "lr": 1.9858177271473947e-05, "grad_norm": 0.11908487975597382, "wall_ms": 1887915}
{"step": 136600, "loss": 0.2749916613101959, "loss_nce": 0.2747371792793274, "loss_mse": 0.0025447311345487833, "lr": 1.9713640378423577e-05, "grad_norm": 0.1375453919172287, "wall_ms": 1889176}
{"step": 136700, "loss": 0.21248821914196014, "loss_nce": 0.21224361658096313, "loss_mse": 0.0024460707791149616, "lr": 1.957014946637858e-05, "grad_norm": 0.11720257252454758, "wall_ms": 1890436}
{"step": 136800, "loss": 0.23494111001491547, "loss_nce": 0.2346923053264618, "loss_mse": 0.0024881171993911266, "lr": 1.942770517323735e-05, "grad_norm": 0.12906084954738617, "wall_ms": 1891698}
{"step": 136900, "loss": 0.22409126162528992, "loss_nce": 0.22384852170944214, "loss_mse": 0.0024273248855024576, "lr": 1.9286308132245344e-05, "grad_norm": 0.11994355171918869, "wall_ms": 1892975}
{"step": 137000, "loss": 0.22934332489967346, "loss_nce": 0.22909227013587952, "loss_mse": 0.0025105448439717293, "lr": 1.9145958971992375e-05, "grad_norm": 0.1282142996788025, "wall_ms": 1894270}
{"step": 137100, "loss": 0.266160249710083, "loss_nce": 0.26591217517852783, "loss_mse": 0.002480743918567896, "lr": 1.900665831640993e-05, "grad_norm": 0.1443452686071396, "wall_ms": 1895562}
{"step": 137200, "loss": 0.20844218134880066, "loss_nce": 0.20819352567195892, "loss_mse": 0.002486555138602853, "lr": 1.8868406784768233e-05, "grad_norm": 0.11478865146636963, "wall_ms": 1896848}
{"step": 137300, "loss": 0.19805854558944702, "loss_nce": 0.1978113055229187, "loss_mse": 0.0024723652750253677, "lr": 1.8731204991673515e-05, "grad_norm": 0.11250732839107513, "wall_ms": 1898141}
{"step": 137400, "loss": 0.21147210896015167, "loss_nce": 0.2112230807542801, "loss_mse": 0.002490233164280653, "lr": 1.8595053547065484e-05, "grad_norm": 0.11913681775331497, "wall_ms": 1899427}
{"step": 137500, "loss": 0.21550631523132324, "loss_nce": 0.21525490283966064, "loss_mse": 0.0025141327641904354, "lr": 1.845995305621429e-05, "grad_norm": 0.1159200668334961, "wall_ms": 1900732}
{"step": 137600, "loss": 0.23724091053009033, "loss_nce": 0.236993670463562, "loss_mse": 0.00247234501875937, "lr": 1.8325904119718032e-05, "grad_norm": 0.1281382292509079, "wall_ms": 1901976}
{"step": 137700, "loss": 0.24512821435928345, "loss_nce": 0.24488011002540588, "loss_mse": 0.0024810393806546926, "lr": 1.8192907333500188e-05, "grad_norm": 0.1318299025297165, "wall_ms": 1903220}
{"step": 137800, "loss": 0.2318413108587265, "loss_nce": 0.23159386217594147, "loss_mse": 0.002474484033882618, "lr": 1.8060963288806638e-05, "grad_norm": 0.12190816551446915, "wall_ms": 1904461}
{"step": 137900, "loss": 0.22001193463802338, "loss_nce": 0.2197653353214264, "loss_mse": 0.002465945202857256, "lr": 1.793007257220334e-05, "grad_norm": 0.12838785350322723, "wall_ms": 1905705}
{"step": 138000, "loss": 0.20902466773986816, "loss_nce": 0.20877951383590698, "loss_mse": 0.002451509004458785, "lr": 1.7800235765573618e-05, "grad_norm": 0.12099099904298782, "wall_ms": 1906949}
{"step": 138100, "loss": 0.21405985951423645, "loss_nce": 0.21381138265132904, "loss_mse": 0.0024848307948559523, "lr": 1.7671453446115502e-05, "grad_norm": 0.12130287289619446, "wall_ms": 1908198}
{"step": 138200, "loss": 0.23004159331321716, "loss_nce": 0.2297939658164978, "loss_mse": 0.0024763101246207952, "lr": 1.754372618633933e-05, "grad_norm": 0.13624514639377594, "wall_ms": 1909443}
{"step": 138300, "loss": 0.267846941947937, "loss_nce": 0.26759588718414307, "loss_mse": 0.002510427264496684, "lr": 1.7417054554065024e-05, "grad_norm": 0.13818638026714325, "wall_ms": 1910687}
{"step": 138400, "loss": 0.2353551834821701, "loss_nce": 0.23510462045669556, "loss_mse": 0.0025056444574147463, "lr": 1.7291439112419624e-05, "grad_norm": 0.12307069450616837, "wall_ms": 1911957}
{"step": 138500, "loss": 0.2176610380411148, "loss_nce": 0.21741175651550293, "loss_mse": 0.0024928483180701733, "lr": 1.7166880419834915e-05, "grad_norm": 0.1098833978176117, "wall_ms": 1913204}
{"step": 138600, "loss": 0.25726357102394104, "loss_nce": 0.2570146918296814, "loss_mse": 0.002488780068233609, "lr": 1.7043379030044702e-05, "grad_norm": 0.1354227215051651, "wall_ms": 1914442}
{"step": 138700, "loss": 0.22502487897872925, "loss_nce": 0.2247757762670517, "loss_mse": 0.00249098869971931, "lr": 1.692093549208262e-05, "grad_norm": 0.13164398074150085, "wall_ms": 1915679}
{"step": 138800, "loss": 0.2185666263103485, "loss_nce": 0.2183193415403366, "loss_mse": 0.002472847932949662, "lr": 1.6799550350279414e-05, "grad_norm": 0.11562614887952805, "wall_ms": 1916918}
{"step": 138900, "loss": 0.22561904788017273, "loss_nce": 0.22537142038345337, "loss_mse": 0.0024762386456131935, "lr": 1.6679224144260763e-05, "grad_norm": 0.12726113200187683, "wall_ms": 1918163}
{"step": 139000, "loss": 0.23886628448963165, "loss_nce": 0.23861560225486755, "loss_mse": 0.002506859600543976, "lr": 1.655995740894471e-05, "grad_norm": 0.11926616728305817, "wall_ms": 1919406}
{"step": 139100, "loss": 0.253744512796402, "loss_nce": 0.253495454788208, "loss_mse": 0.0024904822930693626, "lr": 1.644175067453941e-05, "grad_norm": 0.13795170187950134, "wall_ms": 1920648}
{"step": 139200, "loss": 0.23553305864334106, "loss_nce": 0.23528534173965454, "loss_mse": 0.002477144356817007, "lr": 1.6324604466540683e-05, "grad_norm": 0.1366184502840042, "wall_ms": 1921921}
{"step": 139300, "loss": 0.22096861898899078, "loss_nce": 0.22072139382362366, "loss_mse": 0.002472259569913149, "lr": 1.6208519305729703e-05, "grad_norm": 0.11289890855550766, "wall_ms": 1923202}
{"step": 139400, "loss": 0.21976426243782043, "loss_nce": 0.21951650083065033, "loss_mse": 0.0024775671772658825, "lr": 1.6093495708170685e-05, "grad_norm": 0.13110162317752838, "wall_ms": 1924478}
{"step": 139500, "loss": 0.215211421251297, "loss_nce": 0.2149634063243866, "loss_mse": 0.0024801662657409906, "lr": 1.5979534185208604e-05, "grad_norm": 0.11512964963912964, "wall_ms": 1925747}
{"step": 139600, "loss": 0.25544220209121704, "loss_nce": 0.25519001483917236, "loss_mse": 0.002521856687963009, "lr": 1.5866635243466923e-05, "grad_norm": 0.13143518567085266, "wall_ms": 1927018}
{"step": 139700, "loss": 0.26301947236061096, "loss_nce": 0.26277390122413635, "loss_mse": 0.0024557183496654034, "lr": 1.5754799384845345e-05, "grad_norm": 0.12852151691913605, "wall_ms": 1928295}
{"step": 139800, "loss": 0.2330114096403122, "loss_nce": 0.23276430368423462, "loss_mse": 0.0024710700381547213, "lr": 1.5644027106517515e-05, "grad_norm": 0.12382189184427261, "wall_ms": 1929585}
{"step": 139900, "loss": 0.21208447217941284, "loss_nce": 0.21183769404888153, "loss_mse": 0.0024677743203938007, "lr": 1.5534318900928905e-05, "grad_norm": 0.11637057363986969, "wall_ms": 1930868}
{"step": 140000, "loss": 0.24567827582359314, "loss_nce": 0.2454293817281723, "loss_mse": 0.0024889518972486258, "lr": 1.5425675255794584e-05, "grad_norm": 0.13403667509555817, "wall_ms": 1932117}
{"step": 140100, "loss": 0.22269222140312195, "loss_nce": 0.22244420647621155, "loss_mse": 0.002480089431628585, "lr": 1.5318096654096976e-05, "grad_norm": 0.1226760745048523, "wall_ms": 1944372}
{"step": 140200, "loss": 0.21625323593616486, "loss_nce": 0.21600741147994995, "loss_mse": 0.002458172384649515, "lr": 1.5211583574083892e-05, "grad_norm": 0.11729899048805237, "wall_ms": 1945618}
{"step": 140300, "loss": 0.2663312554359436, "loss_nce": 0.26607969403266907, "loss_mse": 0.002515618223696947, "lr": 1.5106136489266233e-05, "grad_norm": 0.12592019140720367, "wall_ms": 1946866}
{"step": 140400, "loss": 0.25933143496513367, "loss_nce": 0.2590779662132263, "loss_mse": 0.002534609753638506, "lr": 1.50017558684159e-05, "grad_norm": 0.13398374617099762, "wall_ms": 1948110}
{"step": 140500, "loss": 0.26208582520484924, "loss_nce": 0.2618369460105896, "loss_mse": 0.0024887025356292725, "lr": 1.4898442175563888e-05, "grad_norm": 0.13503843545913696, "wall_ms": 1949347}
{"step": 140600, "loss": 0.2400626689195633, "loss_nce": 0.23981434106826782, "loss_mse": 0.002483209129422903, "lr": 1.4796195869998034e-05, "grad_norm": 0.1344202607870102, "wall_ms": 1950591}
{"step": 140700, "loss": 0.24664832651615143, "loss_nce": 0.24639716744422913, "loss_mse": 0.0025116351898759604, "lr": 1.4695017406260963e-05, "grad_norm": 0.13583223521709442, "wall_ms": 1952175}
{"step": 140800, "loss": 0.23106485605239868, "loss_nce": 0.2308165729045868, "loss_mse": 0.0024827804882079363, "lr": 1.4594907234148174e-05, "grad_norm": 0.12462904304265976, "wall_ms": 1953421}
{"step": 140900, "loss": 0.207895427942276, "loss_nce": 0.2076457142829895, "loss_mse": 0.002497077686712146, "lr": 1.449586579870613e-05, "grad_norm": 0.11265287548303604, "wall_ms": 1954676}
{"step": 141000, "loss": 0.2553400695323944, "loss_nce": 0.2550877332687378, "loss_mse": 0.0025234476197510958, "lr": 1.4397893540230015e-05, "grad_norm": 0.13581621646881104, "wall_ms": 1955918}
{"step": 141100, "loss": 0.20879536867141724, "loss_nce": 0.2085493803024292, "loss_mse": 0.002459830604493618, "lr": 1.4300990894261907e-05, "grad_norm": 0.1204482838511467, "wall_ms": 1957159}
{"step": 141200, "loss": 0.22138020396232605, "loss_nce": 0.22112959623336792, "loss_mse": 0.002506042830646038, "lr": 1.4205158291588964e-05, "grad_norm": 0.12893767654895782, "wall_ms": 1958401}
{"step": 141300, "loss": 0.23647288978099823, "loss_nce": 0.23622457683086395, "loss_mse": 0.0024830906186252832, "lr": 1.4110396158241286e-05, "grad_norm": 0.1284753680229187, "wall_ms": 1959642}
{"step": 141400, "loss": 0.22841787338256836, "loss_nce": 0.22816815972328186, "loss_mse": 0.0024970858357846737, "lr": 1.4016704915490192e-05, "grad_norm": 0.12634111940860748, "wall_ms": 1960885}
{"step": 141500, "loss": 0.2603078782558441, "loss_nce": 0.2600569427013397, "loss_mse": 0.0025093231815844774, "lr": 1.3924084979846274e-05, "grad_norm": 0.13156482577323914, "wall_ms": 1962129}
{"step": 141600, "loss": 0.22182397544384003, "loss_nce": 0.22157667577266693, "loss_mse": 0.0024729808792471886, "lr": 1.3832536763057559e-05, "grad_norm": 0.12174489349126816, "wall_ms": 1963372}
{"step": 141700, "loss": 0.23290537297725677, "loss_nce": 0.2326551079750061, "loss_mse": 0.002502627205103636, "lr": 1.3742060672107678e-05, "grad_norm": 0.11423052102327347, "wall_ms": 1964630}
{"step": 141800, "loss": 0.22370968759059906, "loss_nce": 0.22346389293670654, "loss_mse": 0.0024578759912401438, "lr": 1.365265710921405e-05, "grad_norm": 0.11490073055028915, "wall_ms": 1965913}
{"step": 141900, "loss": 0.21887105703353882, "loss_nce": 0.21862506866455078, "loss_mse": 0.0024599090684205294, "lr": 1.356432647182608e-05, "grad_norm": 0.11958448588848114, "wall_ms": 1967195}
{"step": 142000, "loss": 0.21741987764835358, "loss_nce": 0.21717415750026703, "loss_mse": 0.0024572296533733606, "lr": 1.347706915262343e-05, "grad_norm": 0.1243332028388977, "wall_ms": 1968468}
{"step": 142100, "loss": 0.22714513540267944, "loss_nce": 0.22689811885356903, "loss_mse": 0.0024702271912246943, "lr": 1.3390885539514283e-05, "grad_norm": 0.13189247250556946, "wall_ms": 1969735}
{"step": 142200, "loss": 0.2731291651725769, "loss_nce": 0.27287620306015015, "loss_mse": 0.002529755001887679, "lr": 1.3305776015633551e-05, "grad_norm": 0.1380726397037506, "wall_ms": 1970998}
{"step": 142300, "loss": 0.23452328145503998, "loss_nce": 0.23427343368530273, "loss_mse": 0.002498420886695385, "lr": 1.3221740959341242e-05, "grad_norm": 0.1251005083322525, "wall_ms": 1972251}
{"step": 142400, "loss": 0.23414160311222076, "loss_nce": 0.23389360308647156, "loss_mse": 0.0024800070095807314, "lr": 1.3138780744220746e-05, "grad_norm": 0.12976250052452087, "wall_ms": 1973494}
{"step": 142500, "loss": 0.24702201783657074, "loss_nce": 0.24677535891532898, "loss_mse": 0.0024666429962962866, "lr": 1.3056895739077109e-05, "grad_norm": 0.12575654685497284, "wall_ms": 1974736}
{"step": 142600, "loss": 0.22722437977790833, "loss_nce": 0.22697824239730835, "loss_mse": 0.002461363561451435, "lr": 1.2976086307935565e-05, "grad_norm": 0.11825746297836304, "wall_ms": 1975985}
{"step": 142700, "loss": 0.23501679301261902, "loss_nce": 0.23476819694042206, "loss_mse": 0.0024860044941306114, "lr": 1.2896352810039735e-05, "grad_norm": 0.1269513964653015, "wall_ms": 1977233}
{"step": 142800, "loss": 0.24003541469573975, "loss_nce": 0.23978739976882935, "loss_mse": 0.0024801043327897787, "lr": 1.2817695599850131e-05, "grad_norm": 0.11670839041471481, "wall_ms": 1978482}
{"step": 142900, "loss": 0.23804843425750732, "loss_nce": 0.23780159652233124, "loss_mse": 0.0024683892261236906, "lr": 1.2740115027042594e-05, "grad_norm": 0.13980528712272644, "wall_ms": 1979723}
{"step": 143000, "loss": 0.23104062676429749, "loss_nce": 0.23079490661621094, "loss_mse": 0.00245716143399477, "lr": 1.2663611436506632e-05, "grad_norm": 0.12598097324371338, "wall_ms": 1980962}
{"step": 143100, "loss": 0.2455485612154007, "loss_nce": 0.245292529463768, "loss_mse": 0.0025603335816413164, "lr": 1.2588185168344003e-05, "grad_norm": 0.13091741502285004, "wall_ms": 1982205}
{"step": 143200, "loss": 0.21376760303974152, "loss_nce": 0.21352702379226685, "loss_mse": 0.0024057866539806128, "lr": 1.2513836557867131e-05, "grad_norm": 0.12468662112951279, "wall_ms": 1983450}
{"step": 143300, "loss": 0.19546130299568176, "loss_nce": 0.1952154040336609, "loss_mse": 0.0024589707609266043, "lr": 1.244056593559772e-05, "grad_norm": 0.1040220856666565, "wall_ms": 1984691}
{"step": 143400, "loss": 0.2280956655740738, "loss_nce": 0.2278471738100052, "loss_mse": 0.0024849707260727882, "lr": 1.2368373627265107e-05, "grad_norm": 0.12031874060630798, "wall_ms": 1985932}
{"step": 143500, "loss": 0.23261761665344238, "loss_nce": 0.23236846923828125, "loss_mse": 0.0024915477260947227, "lr": 1.2297259953804956e-05, "grad_norm": 0.12389998137950897, "wall_ms": 1987180}
{"step": 143600, "loss": 0.21652300655841827, "loss_nce": 0.2162771224975586, "loss_mse": 0.0024589041713625193, "lr": 1.2227225231357765e-05, "grad_norm": 0.10928698629140854, "wall_ms": 1988446}
{"step": 143700, "loss": 0.2502976953983307, "loss_nce": 0.25004842877388, "loss_mse": 0.0024925379548221827, "lr": 1.215826977126755e-05, "grad_norm": 0.1292482614517212, "wall_ms": 1989697}
{"step": 143800, "loss": 0.21255305409431458, "loss_nce": 0.21230006217956543, "loss_mse": 0.00252998317591846, "lr": 1.2090393880080306e-05, "grad_norm": 0.11934549361467361, "wall_ms": 1990939}
{"step": 143900, "loss": 0.23774197697639465, "loss_nce": 0.23749570548534393, "loss_mse": 0.00246264785528183, "lr": 1.2023597859542778e-05, "grad_norm": 0.1371724009513855, "wall_ms": 1992180}
{"step": 144000, "loss": 0.205282062292099, "loss_nce": 0.2050272673368454, "loss_mse": 0.002547919051721692, "lr": 1.1957882006601099e-05, "grad_norm": 0.1151367574930191, "wall_ms": 1993420}
{"step": 144100, "loss": 0.24529199302196503, "loss_nce": 0.24504053592681885, "loss_mse": 0.0025146387051790953, "lr": 1.1893246613399433e-05, "grad_norm": 0.12776696681976318, "wall_ms": 1994663}
{"step": 144200, "loss": 0.21452246606349945, "loss_nce": 0.21426689624786377, "loss_mse": 0.002555709332227707, "lr": 1.1829691967278613e-05, "grad_norm": 0.11633110791444778, "wall_ms": 1995916}
{"step": 144300, "loss": 0.24163123965263367, "loss_nce": 0.24137867987155914, "loss_mse": 0.0025256178341805935, "lr": 1.1767218350775084e-05, "grad_norm": 0.1395653486251831, "wall_ms": 1997166}
{"step": 144400, "loss": 0.23066522181034088, "loss_nce": 0.23041877150535583, "loss_mse": 0.002464457880705595, "lr": 1.1705826041619372e-05, "grad_norm": 0.12512138485908508, "wall_ms": 1998428}
{"step": 144500, "loss": 0.1956569403409958, "loss_nce": 0.19541296362876892, "loss_mse": 0.002439767587929964, "lr": 1.1645515312735035e-05, "grad_norm": 0.11209730803966522, "wall_ms": 1999689}
{"step": 144600, "loss": 0.21273142099380493, "loss_nce": 0.2124851644039154, "loss_mse": 0.0024625305086374283, "lr": 1.1586286432237424e-05, "grad_norm": 0.11400073021650314, "wall_ms": 2000936}
{"step": 144700, "loss": 0.23203159868717194, "loss_nce": 0.23177845776081085, "loss_mse": 0.0025314041413366795, "lr": 1.1528139663432368e-05, "grad_norm": 0.1262088119983673, "wall_ms": 2002202}
{"step": 144800, "loss": 0.22705376148223877, "loss_nce": 0.22680653631687164, "loss_mse": 0.0024722320958971977, "lr": 1.1471075264815204e-05, "grad_norm": 0.11873861402273178, "wall_ms": 2003447}
{"step": 144900, "loss": 0.22967202961444855, "loss_nce": 0.22942432761192322, "loss_mse": 0.002477068454027176, "lr": 1.1415093490069456e-05, "grad_norm": 0.131086528301239, "wall_ms": 2004703}
{"step": 145000, "loss": 0.2258954793214798, "loss_nce": 0.2256489247083664, "loss_mse": 0.00246558990329504, "lr": 1.1360194588065799e-05, "grad_norm": 0.12227047979831696, "wall_ms": 2005965}
{"step": 145100, "loss": 0.20710648596286774, "loss_nce": 0.20685680210590363, "loss_mse": 0.0024968667421489954, "lr": 1.130637880286091e-05, "grad_norm": 0.10659901797771454, "wall_ms": 2007216}
{"step": 145200, "loss": 0.21773891150951385, "loss_nce": 0.21749337017536163, "loss_mse": 0.002455452224239707, "lr": 1.1253646373696394e-05, "grad_norm": 0.12072473764419556, "wall_ms": 2008475}
{"step": 145300, "loss": 0.24378575384616852, "loss_nce": 0.24353551864624023, "loss_mse": 0.002502372721210122, "lr": 1.1201997534997762e-05, "grad_norm": 0.12994515895843506, "wall_ms": 2009720}
{"step": 145400, "loss": 0.21622486412525177, "loss_nce": 0.2159787118434906, "loss_mse": 0.002461487427353859, "lr": 1.1151432516373298e-05, "grad_norm": 0.1222911924123764, "wall_ms": 2010985}
{"step": 145500, "loss": 0.2384205311536789, "loss_nce": 0.23817209899425507, "loss_mse": 0.002484358148649335, "lr": 1.1101951542613162e-05, "grad_norm": 0.11630015820264816, "wall_ms": 2012250}
{"step": 145600, "loss": 0.2503690719604492, "loss_nce": 0.2501164674758911, "loss_mse": 0.0025259172543883324, "lr": 1.1053554833688237e-05, "grad_norm": 0.131388857960701, "wall_ms": 2013505}
{"step": 145700, "loss": 0.2239970862865448, "loss_nce": 0.22374993562698364, "loss_mse": 0.0024715117178857327, "lr": 1.1006242604749301e-05, "grad_norm": 0.11782564967870712, "wall_ms": 2014773}
{"step": 145800, "loss": 0.2156093567609787, "loss_nce": 0.21535897254943848, "loss_mse": 0.002503858646377921, "lr": 1.0960015066126008e-05, "grad_norm": 0.12348375469446182, "wall_ms": 2016039}
{"step": 145900, "loss": 0.2465624064207077, "loss_nce": 0.24631617963314056, "loss_mse": 0.002462268341332674, "lr": 1.091487242332589e-05, "grad_norm": 0.12954631447792053, "wall_ms": 2017303}
{"step": 146000, "loss": 0.2327023148536682, "loss_nce": 0.23245425522327423, "loss_mse": 0.0024805464781820774, "lr": 1.0870814877033537e-05, "grad_norm": 0.11864183098077774, "wall_ms": 2018566}
{"step": 146100, "loss": 0.2242731750011444, "loss_nce": 0.22402286529541016, "loss_mse": 0.0025030835531651974, "lr": 1.0827842623109696e-05, "grad_norm": 0.1200995221734047, "wall_ms": 2019827}
{"step": 146200, "loss": 0.22511914372444153, "loss_nce": 0.22486907243728638, "loss_mse": 0.0025006665382534266, "lr": 1.0785955852590329e-05, "grad_norm": 0.1207338199019432, "wall_ms": 2021092}
{"step": 146300, "loss": 0.20672820508480072, "loss_nce": 0.20648011565208435, "loss_mse": 0.002480926690623164, "lr": 1.0745154751685839e-05, "grad_norm": 0.11875727027654648, "wall_ms": 2022363}
{"step": 146400, "loss": 0.24303878843784332, "loss_nce": 0.24278876185417175, "loss_mse": 0.002500328700989485, "lr": 1.0705439501780237e-05, "grad_norm": 0.12533506751060486, "wall_ms": 2023635}
{"step": 146500, "loss": 0.24535119533538818, "loss_nce": 0.2450999766588211, "loss_mse": 0.0025122559163719416, "lr": 1.066681027943024e-05, "grad_norm": 0.12788110971450806, "wall_ms": 2024911}
{"step": 146600, "loss": 0.21476982533931732, "loss_nce": 0.21452537178993225, "loss_mse": 0.0024445930030196905, "lr": 1.0629267256364616e-05, "grad_norm": 0.11499352008104324, "wall_ms": 2026189}
{"step": 146700, "loss": 0.22948743402957916, "loss_nce": 0.2292376011610031, "loss_mse": 0.002498346148058772, "lr": 1.059281059948335e-05, "grad_norm": 0.12508335709571838, "wall_ms": 2027456}
{"step": 146800, "loss": 0.2377084195613861, "loss_nce": 0.2374579757452011, "loss_mse": 0.002504480304196477, "lr": 1.0557440470856894e-05, "grad_norm": 0.1281760036945343, "wall_ms": 2028702}
{"step": 146900, "loss": 0.24010393023490906, "loss_nce": 0.2398526817560196, "loss_mse": 0.0025124563835561275, "lr": 1.0523157027725503e-05, "grad_norm": 0.13565482199192047, "wall_ms": 2029956}
{"step": 147000, "loss": 0.24382126331329346, "loss_nce": 0.24356985092163086, "loss_mse": 0.0025140719953924417, "lr": 1.0489960422498462e-05, "grad_norm": 0.12533855438232422, "wall_ms": 2031219}
{"step": 147100, "loss": 0.22467631101608276, "loss_nce": 0.22442564368247986, "loss_mse": 0.002506664954125881, "lr": 1.0457850802753495e-05, "grad_norm": 0.126227468252182, "wall_ms": 2032480}
{"step": 147200, "loss": 0.24448621273040771, "loss_nce": 0.24423277378082275, "loss_mse": 0.002534456318244338, "lr": 1.0426828311236027e-05, "grad_norm": 0.13216720521450043, "wall_ms": 2033738}
{"step": 147300, "loss": 0.22558313608169556, "loss_nce": 0.2253330945968628, "loss_mse": 0.002500434871762991, "lr": 1.0396893085858557e-05, "grad_norm": 0.11895480751991272, "wall_ms": 2034997}
{"step": 147400, "loss": 0.20862285792827606, "loss_nce": 0.20837348699569702, "loss_mse": 0.0024937642738223076, "lr": 1.0368045259700091e-05, "grad_norm": 0.116340272128582, "wall_ms": 2036259}
{"step": 147500, "loss": 0.1977211982011795, "loss_nce": 0.19747787714004517, "loss_mse": 0.002433160552754998, "lr": 1.0340284961005595e-05, "grad_norm": 0.11327332258224487, "wall_ms": 2037507}
{"step": 147600, "loss": 0.23517656326293945, "loss_nce": 0.23492717742919922, "loss_mse": 0.0024939225986599922, "lr": 1.0313612313185286e-05, "grad_norm": 0.13022306561470032, "wall_ms": 2038771}
{"step": 147700, "loss": 0.22446981072425842, "loss_nce": 0.22422054409980774, "loss_mse": 0.002492631087079644, "lr": 1.028802743481415e-05, "grad_norm": 0.13733325898647308, "wall_ms": 2040044}
{"step": 147800, "loss": 0.22619645297527313, "loss_nce": 0.2259499728679657, "loss_mse": 0.00246481504291296, "lr": 1.0263530439631468e-05, "grad_norm": 0.1179613545536995, "wall_ms": 2041319}
{"step": 147900, "loss": 0.21777579188346863, "loss_nce": 0.21752715110778809, "loss_mse": 0.002486392157152295, "lr": 1.024012143654026e-05, "grad_norm": 0.11184456199407578, "wall_ms": 2042585}
{"step": 148000, "loss": 0.21164648234844208, "loss_nce": 0.2113991677761078, "loss_mse": 0.002473160857334733, "lr": 1.0217800529606751e-05, "grad_norm": 0.11802244186401367, "wall_ms": 2043856}
{"step": 148100, "loss": 0.24248768389225006, "loss_nce": 0.2422335147857666, "loss_mse": 0.002541732508689165, "lr": 1.0196567818060029e-05, "grad_norm": 0.11787931621074677, "wall_ms": 2045126}
{"step": 148200, "loss": 0.23774594068527222, "loss_nce": 0.23749804496765137, "loss_mse": 0.002478939015418291, "lr": 1.0176423396291502e-05, "grad_norm": 0.12778763473033905, "wall_ms": 2046402}
{"step": 148300, "loss": 0.22988922894001007, "loss_nce": 0.22964197397232056, "loss_mse": 0.0024724809918552637, "lr": 1.0157367353854531e-05, "grad_norm": 0.13523074984550476, "wall_ms": 2047687}
{"step": 148400, "loss": 0.19948220252990723, "loss_nce": 0.19923600554466248, "loss_mse": 0.0024620164185762405, "lr": 1.0139399775464014e-05, "grad_norm": 0.1108655035495758, "wall_ms": 2048962}
{"step": 148500, "loss": 0.2326967418193817, "loss_nce": 0.23244546353816986, "loss_mse": 0.0025127993430942297, "lr": 1.0122520740995986e-05, "grad_norm": 0.12606281042099, "wall_ms": 2050227}
{"step": 148600, "loss": 0.23978565633296967, "loss_nce": 0.23953601717948914, "loss_mse": 0.002496371977031231, "lr": 1.0106730325487338e-05, "grad_norm": 0.13003847002983093, "wall_ms": 2051486}
{"step": 148700, "loss": 0.21825183928012848, "loss_nce": 0.21800190210342407, "loss_mse": 0.0024993596598505974, "lr": 1.0092028599135339e-05, "grad_norm": 0.11861986666917801, "wall_ms": 2052750}
{"step": 148800, "loss": 0.22292622923851013, "loss_nce": 0.22267812490463257, "loss_mse": 0.002481090137735009, "lr": 1.0078415627297567e-05, "grad_norm": 0.12601128220558167, "wall_ms": 2054024}
{"step": 148900, "loss": 0.19816111028194427, "loss_nce": 0.19791463017463684, "loss_mse": 0.002464830409735441, "lr": 1.0065891470491296e-05, "grad_norm": 0.11937720328569412, "wall_ms": 2055285}
{"step": 149000, "loss": 0.24295459687709808, "loss_nce": 0.24270185828208923, "loss_mse": 0.0025273922365158796, "lr": 1.0054456184393565e-05, "grad_norm": 0.128482386469841, "wall_ms": 2056538}
{"step": 149100, "loss": 0.21177460253238678, "loss_nce": 0.21152889728546143, "loss_mse": 0.002457062480971217, "lr": 1.0044109819840656e-05, "grad_norm": 0.12478934973478317, "wall_ms": 2057809}
{"step": 149200, "loss": 0.22744649648666382, "loss_nce": 0.22719857096672058, "loss_mse": 0.0024793108459562063, "lr": 1.003485242282801e-05, "grad_norm": 0.1287311464548111, "wall_ms": 2059077}
{"step": 149300, "loss": 0.2237858921289444, "loss_nce": 0.22353583574295044, "loss_mse": 0.0025005945935845375, "lr": 1.002668403450998e-05, "grad_norm": 0.12042998522520065, "wall_ms": 2060349}
{"step": 149400, "loss": 0.22772358357906342, "loss_nce": 0.227473184466362, "loss_mse": 0.0025039720349013805, "lr": 1.0019604691199735e-05, "grad_norm": 0.11767154186964035, "wall_ms": 2061621}
{"step": 149500, "loss": 0.23845434188842773, "loss_nce": 0.23820644617080688, "loss_mse": 0.0024789804592728615, "lr": 1.0013614424368916e-05, "grad_norm": 0.12415837496519089, "wall_ms": 2062888}
{"step": 149600, "loss": 0.2305118292570114, "loss_nce": 0.23026657104492188, "loss_mse": 0.002452544402331114, "lr": 1.0008713260647647e-05, "grad_norm": 0.12466101348400116, "wall_ms": 2064158}
{"step": 149700, "loss": 0.23565326631069183, "loss_nce": 0.23540055751800537, "loss_mse": 0.002527112141251564, "lr": 1.0004901221824372e-05, "grad_norm": 0.1246560588479042, "wall_ms": 2065424}
{"step": 149800, "loss": 0.22576335072517395, "loss_nce": 0.22551582753658295, "loss_mse": 0.0024751615710556507, "lr": 1.0002178324845794e-05, "grad_norm": 0.11215802282094955, "wall_ms": 2066699}
{"step": 149900, "loss": 0.24257613718509674, "loss_nce": 0.24232470989227295, "loss_mse": 0.002514241263270378, "lr": 1.0000544581816693e-05, "grad_norm": 0.12875066697597504, "wall_ms": 2067976}
{"step": 150000, "loss": 0.2361295372247696, "loss_nce": 0.23587825894355774, "loss_mse": 0.002512819366529584, "lr": 1e-05, "grad_norm": 0.13920067250728607, "wall_ms": 2069248}