{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.083056478405316,
  "eval_steps": 25,
  "global_step": 29,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.10631229235880399,
      "grad_norm": 9.476922035217285,
      "learning_rate": 2.9999999999999997e-05,
      "loss": 1.5353,
      "step": 1
    },
    {
      "epoch": 0.10631229235880399,
      "eval_loss": 1.7883131504058838,
      "eval_runtime": 5.1849,
      "eval_samples_per_second": 9.643,
      "eval_steps_per_second": 1.35,
      "step": 1
    },
    {
      "epoch": 0.21262458471760798,
      "grad_norm": 11.770483016967773,
      "learning_rate": 5.9999999999999995e-05,
      "loss": 2.0967,
      "step": 2
    },
    {
      "epoch": 0.31893687707641194,
      "grad_norm": 8.020087242126465,
      "learning_rate": 8.999999999999999e-05,
      "loss": 1.3656,
      "step": 3
    },
    {
      "epoch": 0.42524916943521596,
      "grad_norm": 3.8597099781036377,
      "learning_rate": 0.00011999999999999999,
      "loss": 1.4046,
      "step": 4
    },
    {
      "epoch": 0.53156146179402,
      "grad_norm": 2.75777530670166,
      "learning_rate": 0.00015,
      "loss": 0.8511,
      "step": 5
    },
    {
      "epoch": 0.6378737541528239,
      "grad_norm": 2.38714599609375,
      "learning_rate": 0.00017999999999999998,
      "loss": 0.7158,
      "step": 6
    },
    {
      "epoch": 0.7441860465116279,
      "grad_norm": 2.6633334159851074,
      "learning_rate": 0.00020999999999999998,
      "loss": 0.4116,
      "step": 7
    },
    {
      "epoch": 0.8504983388704319,
      "grad_norm": 2.68704891204834,
      "learning_rate": 0.00023999999999999998,
      "loss": 0.1797,
      "step": 8
    },
    {
      "epoch": 0.9568106312292359,
      "grad_norm": 2.1327857971191406,
      "learning_rate": 0.00027,
      "loss": 0.0418,
      "step": 9
    },
    {
      "epoch": 1.06312292358804,
      "grad_norm": 6.611416339874268,
      "learning_rate": 0.0003,
      "loss": 0.0955,
      "step": 10
    },
    {
      "epoch": 1.169435215946844,
      "grad_norm": 0.27543535828590393,
      "learning_rate": 0.00029795419551040833,
      "loss": 0.0066,
      "step": 11
    },
    {
      "epoch": 1.2757475083056478,
      "grad_norm": 0.1719840168952942,
      "learning_rate": 0.00029187258625509513,
      "loss": 0.0057,
      "step": 12
    },
    {
      "epoch": 1.3820598006644518,
      "grad_norm": 0.05766455829143524,
      "learning_rate": 0.00028192106268097334,
      "loss": 0.0028,
      "step": 13
    },
    {
      "epoch": 1.4883720930232558,
      "grad_norm": 0.5688024163246155,
      "learning_rate": 0.00026837107640945905,
      "loss": 0.0038,
      "step": 14
    },
    {
      "epoch": 1.5946843853820598,
      "grad_norm": 0.28620290756225586,
      "learning_rate": 0.00025159223574386114,
      "loss": 0.004,
      "step": 15
    },
    {
      "epoch": 1.7009966777408638,
      "grad_norm": 0.12602539360523224,
      "learning_rate": 0.00023204222371836405,
      "loss": 0.0017,
      "step": 16
    },
    {
      "epoch": 1.8073089700996676,
      "grad_norm": 0.023306749761104584,
      "learning_rate": 0.0002102543136979454,
      "loss": 0.0011,
      "step": 17
    },
    {
      "epoch": 1.9136212624584719,
      "grad_norm": 0.024062521755695343,
      "learning_rate": 0.00018682282307111987,
      "loss": 0.001,
      "step": 18
    },
    {
      "epoch": 2.0199335548172757,
      "grad_norm": 0.03811626136302948,
      "learning_rate": 0.00016238690182084986,
      "loss": 0.0015,
      "step": 19
    },
    {
      "epoch": 2.12624584717608,
      "grad_norm": 0.008828516118228436,
      "learning_rate": 0.00013761309817915014,
      "loss": 0.0005,
      "step": 20
    },
    {
      "epoch": 2.2325581395348837,
      "grad_norm": 0.008889851160347462,
      "learning_rate": 0.00011317717692888012,
      "loss": 0.0005,
      "step": 21
    },
    {
      "epoch": 2.338870431893688,
      "grad_norm": 0.006506668403744698,
      "learning_rate": 8.97456863020546e-05,
      "loss": 0.0005,
      "step": 22
    },
    {
      "epoch": 2.4451827242524917,
      "grad_norm": 0.007389615289866924,
      "learning_rate": 6.795777628163599e-05,
      "loss": 0.0005,
      "step": 23
    },
    {
      "epoch": 2.5514950166112955,
      "grad_norm": 0.006299012806266546,
      "learning_rate": 4.840776425613886e-05,
      "loss": 0.0005,
      "step": 24
    },
    {
      "epoch": 2.6578073089700998,
      "grad_norm": 0.006252613849937916,
      "learning_rate": 3.162892359054098e-05,
      "loss": 0.0004,
      "step": 25
    },
    {
      "epoch": 2.6578073089700998,
      "eval_loss": 0.00036564457695931196,
      "eval_runtime": 4.1967,
      "eval_samples_per_second": 11.914,
      "eval_steps_per_second": 1.668,
      "step": 25
    },
    {
      "epoch": 2.7641196013289036,
      "grad_norm": 0.005357843823730946,
      "learning_rate": 1.8078937319026654e-05,
      "loss": 0.0004,
      "step": 26
    },
    {
      "epoch": 2.870431893687708,
      "grad_norm": 0.005207686685025692,
      "learning_rate": 8.127413744904804e-06,
      "loss": 0.0003,
      "step": 27
    },
    {
      "epoch": 2.9767441860465116,
      "grad_norm": 0.007186330854892731,
      "learning_rate": 2.0458044895916513e-06,
      "loss": 0.0004,
      "step": 28
    },
    {
      "epoch": 3.083056478405316,
      "grad_norm": 0.007002322934567928,
      "learning_rate": 0.0,
      "loss": 0.0005,
      "step": 29
    }
  ],
  "logging_steps": 1,
  "max_steps": 29,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 50,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 1,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 4.571959788149146e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}