{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.983957219251337,
  "eval_steps": 500,
  "global_step": 46,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0213903743315508,
      "grad_norm": 13.02454662322998,
      "learning_rate": 0.0,
      "loss": 1.6624,
      "step": 1
    },
    {
      "epoch": 0.0427807486631016,
      "grad_norm": 12.534032821655273,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 1.6374,
      "step": 2
    },
    {
      "epoch": 0.06417112299465241,
      "grad_norm": 12.500082015991211,
      "learning_rate": 4.000000000000001e-06,
      "loss": 1.5855,
      "step": 3
    },
    {
      "epoch": 0.0855614973262032,
      "grad_norm": 19.927814483642578,
      "learning_rate": 6e-06,
      "loss": 1.585,
      "step": 4
    },
    {
      "epoch": 0.10695187165775401,
      "grad_norm": 8.535591125488281,
      "learning_rate": 8.000000000000001e-06,
      "loss": 1.4463,
      "step": 5
    },
    {
      "epoch": 0.12834224598930483,
      "grad_norm": 8.471524238586426,
      "learning_rate": 1e-05,
      "loss": 1.3428,
      "step": 6
    },
    {
      "epoch": 0.1497326203208556,
      "grad_norm": 6.918763637542725,
      "learning_rate": 9.756097560975611e-06,
      "loss": 1.3263,
      "step": 7
    },
    {
      "epoch": 0.1711229946524064,
      "grad_norm": 10.45449161529541,
      "learning_rate": 9.51219512195122e-06,
      "loss": 1.1986,
      "step": 8
    },
    {
      "epoch": 0.1925133689839572,
      "grad_norm": 7.750345706939697,
      "learning_rate": 9.268292682926831e-06,
      "loss": 1.1741,
      "step": 9
    },
    {
      "epoch": 0.21390374331550802,
      "grad_norm": 18.263578414916992,
      "learning_rate": 9.02439024390244e-06,
      "loss": 1.1137,
      "step": 10
    },
    {
      "epoch": 0.23529411764705882,
      "grad_norm": 10.370940208435059,
      "learning_rate": 8.78048780487805e-06,
      "loss": 1.0881,
      "step": 11
    },
    {
      "epoch": 0.25668449197860965,
      "grad_norm": 6.241279125213623,
      "learning_rate": 8.536585365853658e-06,
      "loss": 1.0457,
      "step": 12
    },
    {
      "epoch": 0.27807486631016043,
      "grad_norm": 6.566608905792236,
      "learning_rate": 8.292682926829268e-06,
      "loss": 1.0565,
      "step": 13
    },
    {
      "epoch": 0.2994652406417112,
      "grad_norm": 50.85325241088867,
      "learning_rate": 8.048780487804879e-06,
      "loss": 0.9655,
      "step": 14
    },
    {
      "epoch": 0.32085561497326204,
      "grad_norm": 4.764376163482666,
      "learning_rate": 7.804878048780489e-06,
      "loss": 1.07,
      "step": 15
    },
    {
      "epoch": 0.3422459893048128,
      "grad_norm": 8.540885925292969,
      "learning_rate": 7.560975609756098e-06,
      "loss": 0.9066,
      "step": 16
    },
    {
      "epoch": 0.36363636363636365,
      "grad_norm": 7.087356090545654,
      "learning_rate": 7.317073170731707e-06,
      "loss": 1.0001,
      "step": 17
    },
    {
      "epoch": 0.3850267379679144,
      "grad_norm": 7.610484600067139,
      "learning_rate": 7.0731707317073175e-06,
      "loss": 0.9206,
      "step": 18
    },
    {
      "epoch": 0.40641711229946526,
      "grad_norm": 19.34569549560547,
      "learning_rate": 6.829268292682928e-06,
      "loss": 0.9691,
      "step": 19
    },
    {
      "epoch": 0.42780748663101603,
      "grad_norm": 7.0099053382873535,
      "learning_rate": 6.585365853658538e-06,
      "loss": 0.9798,
      "step": 20
    },
    {
      "epoch": 0.44919786096256686,
      "grad_norm": 5.669744491577148,
      "learning_rate": 6.341463414634147e-06,
      "loss": 0.9754,
      "step": 21
    },
    {
      "epoch": 0.47058823529411764,
      "grad_norm": 4.6388630867004395,
      "learning_rate": 6.0975609756097564e-06,
      "loss": 0.8885,
      "step": 22
    },
    {
      "epoch": 0.4919786096256685,
      "grad_norm": 8.106114387512207,
      "learning_rate": 5.853658536585366e-06,
      "loss": 0.9252,
      "step": 23
    },
    {
      "epoch": 0.5133689839572193,
      "grad_norm": 7.336320400238037,
      "learning_rate": 5.609756097560977e-06,
      "loss": 0.916,
      "step": 24
    },
    {
      "epoch": 0.5347593582887701,
      "grad_norm": 18.173933029174805,
      "learning_rate": 5.365853658536586e-06,
      "loss": 0.8807,
      "step": 25
    },
    {
      "epoch": 0.5561497326203209,
      "grad_norm": 6.46876335144043,
      "learning_rate": 5.121951219512195e-06,
      "loss": 0.8674,
      "step": 26
    },
    {
      "epoch": 0.5775401069518716,
      "grad_norm": 7.97756290435791,
      "learning_rate": 4.8780487804878055e-06,
      "loss": 0.8969,
      "step": 27
    },
    {
      "epoch": 0.5989304812834224,
      "grad_norm": 18.289745330810547,
      "learning_rate": 4.634146341463416e-06,
      "loss": 0.9367,
      "step": 28
    },
    {
      "epoch": 0.6203208556149733,
      "grad_norm": 6.36326789855957,
      "learning_rate": 4.390243902439025e-06,
      "loss": 0.849,
      "step": 29
    },
    {
      "epoch": 0.6417112299465241,
      "grad_norm": 6.118152618408203,
      "learning_rate": 4.146341463414634e-06,
      "loss": 0.8319,
      "step": 30
    },
    {
      "epoch": 0.6631016042780749,
      "grad_norm": 9.238419532775879,
      "learning_rate": 3.902439024390244e-06,
      "loss": 0.9253,
      "step": 31
    },
    {
      "epoch": 0.6844919786096256,
      "grad_norm": 5.425105571746826,
      "learning_rate": 3.6585365853658537e-06,
      "loss": 0.8489,
      "step": 32
    },
    {
      "epoch": 0.7058823529411765,
      "grad_norm": 6.4847731590271,
      "learning_rate": 3.414634146341464e-06,
      "loss": 0.8567,
      "step": 33
    },
    {
      "epoch": 0.7272727272727273,
      "grad_norm": 6.540438175201416,
      "learning_rate": 3.1707317073170736e-06,
      "loss": 0.9211,
      "step": 34
    },
    {
      "epoch": 0.7486631016042781,
      "grad_norm": 10.181364059448242,
      "learning_rate": 2.926829268292683e-06,
      "loss": 0.8605,
      "step": 35
    },
    {
      "epoch": 0.7700534759358288,
      "grad_norm": 5.964487075805664,
      "learning_rate": 2.682926829268293e-06,
      "loss": 0.8089,
      "step": 36
    },
    {
      "epoch": 0.7914438502673797,
      "grad_norm": 9.849525451660156,
      "learning_rate": 2.4390243902439027e-06,
      "loss": 0.8418,
      "step": 37
    },
    {
      "epoch": 0.8128342245989305,
      "grad_norm": 6.233251571655273,
      "learning_rate": 2.1951219512195125e-06,
      "loss": 0.8011,
      "step": 38
    },
    {
      "epoch": 0.8342245989304813,
      "grad_norm": 4.578292369842529,
      "learning_rate": 1.951219512195122e-06,
      "loss": 0.9271,
      "step": 39
    },
    {
      "epoch": 0.8556149732620321,
      "grad_norm": 9.469111442565918,
      "learning_rate": 1.707317073170732e-06,
      "loss": 0.9077,
      "step": 40
    },
    {
      "epoch": 0.8770053475935828,
      "grad_norm": 25.019927978515625,
      "learning_rate": 1.4634146341463414e-06,
      "loss": 0.8391,
      "step": 41
    },
    {
      "epoch": 0.8983957219251337,
      "grad_norm": 4.891110897064209,
      "learning_rate": 1.2195121951219514e-06,
      "loss": 0.8804,
      "step": 42
    },
    {
      "epoch": 0.9197860962566845,
      "grad_norm": 5.689752578735352,
      "learning_rate": 9.75609756097561e-07,
      "loss": 0.8833,
      "step": 43
    },
    {
      "epoch": 0.9411764705882353,
      "grad_norm": 5.5397772789001465,
      "learning_rate": 7.317073170731707e-07,
      "loss": 0.8524,
      "step": 44
    },
    {
      "epoch": 0.9625668449197861,
      "grad_norm": 9.089788436889648,
      "learning_rate": 4.878048780487805e-07,
      "loss": 0.8907,
      "step": 45
    },
    {
      "epoch": 0.983957219251337,
      "grad_norm": 5.81181526184082,
      "learning_rate": 2.439024390243903e-07,
      "loss": 0.8489,
      "step": 46
    },
    {
      "epoch": 0.983957219251337,
      "step": 46,
      "total_flos": 199575490265088.0,
      "train_loss": 1.020340691442075,
      "train_runtime": 3230.2657,
      "train_samples_per_second": 0.463,
      "train_steps_per_second": 0.014
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 46,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 199575490265088.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}