{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9907578558225508, "eval_steps": 500, "global_step": 67, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014787430683918669, "grad_norm": 2.2905805110931396, "learning_rate": 4.997252228714279e-05, "loss": 1.5801, "num_input_tokens_seen": 1572864, "step": 1 }, { "epoch": 0.029574861367837338, "grad_norm": 1.8597325086593628, "learning_rate": 4.9890149550547454e-05, "loss": 1.3696, "num_input_tokens_seen": 3145728, "step": 2 }, { "epoch": 0.04436229205175601, "grad_norm": 1.7161543369293213, "learning_rate": 4.9753062863366276e-05, "loss": 1.2964, "num_input_tokens_seen": 4718592, "step": 3 }, { "epoch": 0.059149722735674676, "grad_norm": 1.556989073753357, "learning_rate": 4.95615635718894e-05, "loss": 1.2465, "num_input_tokens_seen": 6291456, "step": 4 }, { "epoch": 0.07393715341959335, "grad_norm": 1.2705891132354736, "learning_rate": 4.931607263312032e-05, "loss": 1.1284, "num_input_tokens_seen": 7864320, "step": 5 }, { "epoch": 0.08872458410351201, "grad_norm": 1.1708149909973145, "learning_rate": 4.9017129689421e-05, "loss": 1.0455, "num_input_tokens_seen": 9437184, "step": 6 }, { "epoch": 0.10351201478743069, "grad_norm": 1.0333069562911987, "learning_rate": 4.8665391882260856e-05, "loss": 0.985, "num_input_tokens_seen": 11010048, "step": 7 }, { "epoch": 0.11829944547134935, "grad_norm": 1.0009125471115112, "learning_rate": 4.8261632407677174e-05, "loss": 0.957, "num_input_tokens_seen": 12582912, "step": 8 }, { "epoch": 0.133086876155268, "grad_norm": 0.7598341107368469, "learning_rate": 4.780673881662242e-05, "loss": 0.9121, "num_input_tokens_seen": 14155776, "step": 9 }, { "epoch": 0.1478743068391867, "grad_norm": 0.366968035697937, "learning_rate": 4.730171106393466e-05, "loss": 0.8857, "num_input_tokens_seen": 15728640, "step": 10 }, { "epoch": 0.16266173752310537, "grad_norm": 0.2760382890701294, "learning_rate": 4.674765931021976e-05, "loss": 0.8871, "num_input_tokens_seen": 17301504, "step": 11 }, { "epoch": 0.17744916820702403, "grad_norm": 0.23743785917758942, "learning_rate": 4.614580148147744e-05, "loss": 0.8668, "num_input_tokens_seen": 18874368, "step": 12 }, { "epoch": 0.1922365988909427, "grad_norm": 0.21216906607151031, "learning_rate": 4.5497460591835615e-05, "loss": 0.8264, "num_input_tokens_seen": 20447232, "step": 13 }, { "epoch": 0.20702402957486138, "grad_norm": 0.202525332570076, "learning_rate": 4.480406183527823e-05, "loss": 0.8316, "num_input_tokens_seen": 22020096, "step": 14 }, { "epoch": 0.22181146025878004, "grad_norm": 0.1785624921321869, "learning_rate": 4.406712945275955e-05, "loss": 0.8214, "num_input_tokens_seen": 23592960, "step": 15 }, { "epoch": 0.2365988909426987, "grad_norm": 0.17254720628261566, "learning_rate": 4.328828338159173e-05, "loss": 0.8074, "num_input_tokens_seen": 25165824, "step": 16 }, { "epoch": 0.2513863216266174, "grad_norm": 0.15905120968818665, "learning_rate": 4.2469235694471043e-05, "loss": 0.7947, "num_input_tokens_seen": 26738688, "step": 17 }, { "epoch": 0.266173752310536, "grad_norm": 0.13924936950206757, "learning_rate": 4.161178683597054e-05, "loss": 0.7868, "num_input_tokens_seen": 28311552, "step": 18 }, { "epoch": 0.2809611829944547, "grad_norm": 0.12500160932540894, "learning_rate": 4.071782166477213e-05, "loss": 0.7657, "num_input_tokens_seen": 29884416, "step": 19 }, { "epoch": 0.2957486136783734, "grad_norm": 0.11662258207798004, "learning_rate": 3.978930531033807e-05, "loss": 0.7821, "num_input_tokens_seen": 31457280, "step": 20 }, { "epoch": 0.31053604436229204, "grad_norm": 0.10671637207269669, "learning_rate": 3.882827885312999e-05, "loss": 0.764, "num_input_tokens_seen": 33030144, "step": 21 }, { "epoch": 0.32532347504621073, "grad_norm": 0.10863461345434189, "learning_rate": 3.783685483787105e-05, "loss": 0.7781, "num_input_tokens_seen": 34603008, "step": 22 }, { "epoch": 0.34011090573012936, "grad_norm": 0.09509966522455215, "learning_rate": 3.681721262971413e-05, "loss": 0.7663, "num_input_tokens_seen": 36175872, "step": 23 }, { "epoch": 0.35489833641404805, "grad_norm": 0.08742663264274597, "learning_rate": 3.5771593623524265e-05, "loss": 0.7303, "num_input_tokens_seen": 37748736, "step": 24 }, { "epoch": 0.36968576709796674, "grad_norm": 0.09325291961431503, "learning_rate": 3.4702296316806244e-05, "loss": 0.7626, "num_input_tokens_seen": 39321600, "step": 25 }, { "epoch": 0.3844731977818854, "grad_norm": 0.08703341335058212, "learning_rate": 3.361167125710832e-05, "loss": 0.7709, "num_input_tokens_seen": 40894464, "step": 26 }, { "epoch": 0.39926062846580407, "grad_norm": 0.09138187021017075, "learning_rate": 3.2502115875008524e-05, "loss": 0.7638, "num_input_tokens_seen": 42467328, "step": 27 }, { "epoch": 0.41404805914972276, "grad_norm": 0.08183719217777252, "learning_rate": 3.1376069214041913e-05, "loss": 0.7561, "num_input_tokens_seen": 44040192, "step": 28 }, { "epoch": 0.4288354898336414, "grad_norm": 0.08267659693956375, "learning_rate": 3.0236006569153617e-05, "loss": 0.7372, "num_input_tokens_seen": 45613056, "step": 29 }, { "epoch": 0.4436229205175601, "grad_norm": 0.07969928532838821, "learning_rate": 2.9084434045463255e-05, "loss": 0.7281, "num_input_tokens_seen": 47185920, "step": 30 }, { "epoch": 0.4584103512014787, "grad_norm": 0.07706066220998764, "learning_rate": 2.792388304930207e-05, "loss": 0.7601, "num_input_tokens_seen": 48758784, "step": 31 }, { "epoch": 0.4731977818853974, "grad_norm": 0.07695123553276062, "learning_rate": 2.6756904723632324e-05, "loss": 0.7296, "num_input_tokens_seen": 50331648, "step": 32 }, { "epoch": 0.4879852125693161, "grad_norm": 0.07787525653839111, "learning_rate": 2.5586064340081516e-05, "loss": 0.741, "num_input_tokens_seen": 51904512, "step": 33 }, { "epoch": 0.5027726432532348, "grad_norm": 0.08225582540035248, "learning_rate": 2.441393565991849e-05, "loss": 0.7251, "num_input_tokens_seen": 53477376, "step": 34 }, { "epoch": 0.5175600739371534, "grad_norm": 0.0731961578130722, "learning_rate": 2.3243095276367685e-05, "loss": 0.7385, "num_input_tokens_seen": 55050240, "step": 35 }, { "epoch": 0.532347504621072, "grad_norm": 0.08208758383989334, "learning_rate": 2.207611695069794e-05, "loss": 0.746, "num_input_tokens_seen": 56623104, "step": 36 }, { "epoch": 0.5471349353049908, "grad_norm": 0.07385499030351639, "learning_rate": 2.0915565954536744e-05, "loss": 0.7315, "num_input_tokens_seen": 58195968, "step": 37 }, { "epoch": 0.5619223659889094, "grad_norm": 0.06937970221042633, "learning_rate": 1.9763993430846395e-05, "loss": 0.7267, "num_input_tokens_seen": 59768832, "step": 38 }, { "epoch": 0.5767097966728281, "grad_norm": 0.0702456384897232, "learning_rate": 1.8623930785958092e-05, "loss": 0.7443, "num_input_tokens_seen": 61341696, "step": 39 }, { "epoch": 0.5914972273567468, "grad_norm": 0.0659838542342186, "learning_rate": 1.749788412499149e-05, "loss": 0.7163, "num_input_tokens_seen": 62914560, "step": 40 }, { "epoch": 0.6062846580406654, "grad_norm": 0.07357968389987946, "learning_rate": 1.638832874289168e-05, "loss": 0.73, "num_input_tokens_seen": 64487424, "step": 41 }, { "epoch": 0.6210720887245841, "grad_norm": 0.0689447820186615, "learning_rate": 1.5297703683193752e-05, "loss": 0.723, "num_input_tokens_seen": 66060288, "step": 42 }, { "epoch": 0.6358595194085028, "grad_norm": 0.0671798512339592, "learning_rate": 1.4228406376475742e-05, "loss": 0.7176, "num_input_tokens_seen": 67633152, "step": 43 }, { "epoch": 0.6506469500924215, "grad_norm": 0.06556376069784164, "learning_rate": 1.3182787370285865e-05, "loss": 0.7066, "num_input_tokens_seen": 69206016, "step": 44 }, { "epoch": 0.6654343807763401, "grad_norm": 0.07168299704790115, "learning_rate": 1.2163145162128947e-05, "loss": 0.7244, "num_input_tokens_seen": 70778880, "step": 45 }, { "epoch": 0.6802218114602587, "grad_norm": 0.06598961353302002, "learning_rate": 1.1171721146870015e-05, "loss": 0.7349, "num_input_tokens_seen": 72351744, "step": 46 }, { "epoch": 0.6950092421441775, "grad_norm": 0.06602618843317032, "learning_rate": 1.021069468966194e-05, "loss": 0.7487, "num_input_tokens_seen": 73924608, "step": 47 }, { "epoch": 0.7097966728280961, "grad_norm": 0.0713997408747673, "learning_rate": 9.282178335227884e-06, "loss": 0.7381, "num_input_tokens_seen": 75497472, "step": 48 }, { "epoch": 0.7245841035120147, "grad_norm": 0.06555724889039993, "learning_rate": 8.38821316402946e-06, "loss": 0.7262, "num_input_tokens_seen": 77070336, "step": 49 }, { "epoch": 0.7393715341959335, "grad_norm": 0.06813663244247437, "learning_rate": 7.530764305528959e-06, "loss": 0.7473, "num_input_tokens_seen": 78643200, "step": 50 }, { "epoch": 0.7541589648798521, "grad_norm": 0.06930514425039291, "learning_rate": 6.711716618408281e-06, "loss": 0.6998, "num_input_tokens_seen": 80216064, "step": 51 }, { "epoch": 0.7689463955637708, "grad_norm": 0.06492163240909576, "learning_rate": 5.932870547240454e-06, "loss": 0.7218, "num_input_tokens_seen": 81788928, "step": 52 }, { "epoch": 0.7837338262476895, "grad_norm": 0.07155918329954147, "learning_rate": 5.1959381647217666e-06, "loss": 0.7314, "num_input_tokens_seen": 83361792, "step": 53 }, { "epoch": 0.7985212569316081, "grad_norm": 0.06532897800207138, "learning_rate": 4.502539408164386e-06, "loss": 0.7028, "num_input_tokens_seen": 84934656, "step": 54 }, { "epoch": 0.8133086876155268, "grad_norm": 0.06727246940135956, "learning_rate": 3.8541985185225645e-06, "loss": 0.7084, "num_input_tokens_seen": 86507520, "step": 55 }, { "epoch": 0.8280961182994455, "grad_norm": 0.06698304414749146, "learning_rate": 3.252340689780245e-06, "loss": 0.7223, "num_input_tokens_seen": 88080384, "step": 56 }, { "epoch": 0.8428835489833642, "grad_norm": 0.06450291723012924, "learning_rate": 2.6982889360653377e-06, "loss": 0.7195, "num_input_tokens_seen": 89653248, "step": 57 }, { "epoch": 0.8576709796672828, "grad_norm": 0.06992805004119873, "learning_rate": 2.1932611833775846e-06, "loss": 0.7431, "num_input_tokens_seen": 91226112, "step": 58 }, { "epoch": 0.8724584103512015, "grad_norm": 0.06958083808422089, "learning_rate": 1.738367592322837e-06, "loss": 0.732, "num_input_tokens_seen": 92798976, "step": 59 }, { "epoch": 0.8872458410351202, "grad_norm": 0.0694640502333641, "learning_rate": 1.3346081177391472e-06, "loss": 0.7302, "num_input_tokens_seen": 94371840, "step": 60 }, { "epoch": 0.9020332717190388, "grad_norm": 0.07005713880062103, "learning_rate": 9.828703105789983e-07, "loss": 0.7197, "num_input_tokens_seen": 95944704, "step": 61 }, { "epoch": 0.9168207024029574, "grad_norm": 0.07030840963125229, "learning_rate": 6.839273668796747e-07, "loss": 0.7203, "num_input_tokens_seen": 97517568, "step": 62 }, { "epoch": 0.9316081330868762, "grad_norm": 0.0708225816488266, "learning_rate": 4.3843642811059737e-07, "loss": 0.7474, "num_input_tokens_seen": 99090432, "step": 63 }, { "epoch": 0.9463955637707948, "grad_norm": 0.0676749050617218, "learning_rate": 2.4693713663372644e-07, "loss": 0.7403, "num_input_tokens_seen": 100663296, "step": 64 }, { "epoch": 0.9611829944547134, "grad_norm": 0.06782912462949753, "learning_rate": 1.0985044945254764e-07, "loss": 0.7327, "num_input_tokens_seen": 102236160, "step": 65 }, { "epoch": 0.9759704251386322, "grad_norm": 0.06357243657112122, "learning_rate": 2.7477712857215677e-08, "loss": 0.725, "num_input_tokens_seen": 103809024, "step": 66 }, { "epoch": 0.9907578558225508, "grad_norm": 0.07199209183454514, "learning_rate": 0.0, "loss": 0.7209, "num_input_tokens_seen": 105381888, "step": 67 }, { "epoch": 0.9907578558225508, "num_input_tokens_seen": 105381888, "step": 67, "total_flos": 4.104162098269913e+18, "train_loss": 0.8075656152483243, "train_runtime": 10309.5741, "train_samples_per_second": 2.518, "train_steps_per_second": 0.006 } ], "logging_steps": 1, "max_steps": 67, "num_input_tokens_seen": 105381888, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.104162098269913e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }