Text Generation
Transformers
Safetensors
llama
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use SimpleBerry/LLaMA-O1-Supervised-1129 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use SimpleBerry/LLaMA-O1-Supervised-1129 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="SimpleBerry/LLaMA-O1-Supervised-1129") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("SimpleBerry/LLaMA-O1-Supervised-1129") model = AutoModelForCausalLM.from_pretrained("SimpleBerry/LLaMA-O1-Supervised-1129") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use SimpleBerry/LLaMA-O1-Supervised-1129 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "SimpleBerry/LLaMA-O1-Supervised-1129" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SimpleBerry/LLaMA-O1-Supervised-1129", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/SimpleBerry/LLaMA-O1-Supervised-1129
- SGLang
How to use SimpleBerry/LLaMA-O1-Supervised-1129 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "SimpleBerry/LLaMA-O1-Supervised-1129" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SimpleBerry/LLaMA-O1-Supervised-1129", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "SimpleBerry/LLaMA-O1-Supervised-1129" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SimpleBerry/LLaMA-O1-Supervised-1129", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use SimpleBerry/LLaMA-O1-Supervised-1129 with Docker Model Runner:
docker model run hf.co/SimpleBerry/LLaMA-O1-Supervised-1129
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9985559566787003, | |
| "eval_steps": 500, | |
| "global_step": 1730, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0011552346570397113, | |
| "grad_norm": 0.717101514339447, | |
| "learning_rate": 0.0, | |
| "loss": 1.5901, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0023104693140794225, | |
| "grad_norm": 0.7195026874542236, | |
| "learning_rate": 5.017166594399687e-07, | |
| "loss": 1.583, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0034657039711191336, | |
| "grad_norm": 0.7274531722068787, | |
| "learning_rate": 7.952020911994375e-07, | |
| "loss": 1.6153, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004620938628158845, | |
| "grad_norm": 0.7189211249351501, | |
| "learning_rate": 1.0034333188799374e-06, | |
| "loss": 1.5939, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005776173285198556, | |
| "grad_norm": 0.7074140906333923, | |
| "learning_rate": 1.164950007226698e-06, | |
| "loss": 1.6126, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006931407942238267, | |
| "grad_norm": 0.7060695886611938, | |
| "learning_rate": 1.2969187506394062e-06, | |
| "loss": 1.597, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.008086642599277978, | |
| "grad_norm": 0.693950891494751, | |
| "learning_rate": 1.4084967333570947e-06, | |
| "loss": 1.584, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.00924187725631769, | |
| "grad_norm": 0.6204413771629333, | |
| "learning_rate": 1.5051499783199062e-06, | |
| "loss": 1.5333, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0103971119133574, | |
| "grad_norm": 0.5834112763404846, | |
| "learning_rate": 1.590404182398875e-06, | |
| "loss": 1.5173, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.011552346570397111, | |
| "grad_norm": 0.5479596257209778, | |
| "learning_rate": 1.666666666666667e-06, | |
| "loss": 1.4693, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012707581227436824, | |
| "grad_norm": 0.3792664110660553, | |
| "learning_rate": 1.7356544752637086e-06, | |
| "loss": 1.3613, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.013862815884476534, | |
| "grad_norm": 0.346934974193573, | |
| "learning_rate": 1.798635410079375e-06, | |
| "loss": 1.3096, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.015018050541516245, | |
| "grad_norm": 0.30829983949661255, | |
| "learning_rate": 1.8565722538447281e-06, | |
| "loss": 1.3096, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.016173285198555955, | |
| "grad_norm": 0.29539182782173157, | |
| "learning_rate": 1.9102133927970633e-06, | |
| "loss": 1.3067, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.017328519855595668, | |
| "grad_norm": 0.17869696021080017, | |
| "learning_rate": 1.960152098426136e-06, | |
| "loss": 1.2191, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01848375451263538, | |
| "grad_norm": 0.3124949634075165, | |
| "learning_rate": 2.0068666377598747e-06, | |
| "loss": 1.2029, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01963898916967509, | |
| "grad_norm": 0.33391040563583374, | |
| "learning_rate": 2.0507482022971233e-06, | |
| "loss": 1.2076, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0207942238267148, | |
| "grad_norm": 0.3112436830997467, | |
| "learning_rate": 2.0921208418388438e-06, | |
| "loss": 1.1862, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.021949458483754514, | |
| "grad_norm": 0.29002705216407776, | |
| "learning_rate": 2.1312560015880486e-06, | |
| "loss": 1.1844, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.023104693140794223, | |
| "grad_norm": 0.2618213891983032, | |
| "learning_rate": 2.1683833261066357e-06, | |
| "loss": 1.1763, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.024259927797833935, | |
| "grad_norm": 0.20633897185325623, | |
| "learning_rate": 2.2036988245565326e-06, | |
| "loss": 1.1683, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.025415162454873647, | |
| "grad_norm": 0.17382092773914337, | |
| "learning_rate": 2.2373711347036773e-06, | |
| "loss": 1.1119, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.026570397111913356, | |
| "grad_norm": 0.12656044960021973, | |
| "learning_rate": 2.269546393362655e-06, | |
| "loss": 1.1001, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02772563176895307, | |
| "grad_norm": 0.1064457818865776, | |
| "learning_rate": 2.3003520695193436e-06, | |
| "loss": 1.1154, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.02888086642599278, | |
| "grad_norm": 0.09630957245826721, | |
| "learning_rate": 2.329900014453396e-06, | |
| "loss": 1.0793, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03003610108303249, | |
| "grad_norm": 0.10916559398174286, | |
| "learning_rate": 2.358288913284697e-06, | |
| "loss": 1.0672, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.031191335740072202, | |
| "grad_norm": 0.11219058930873871, | |
| "learning_rate": 2.385606273598312e-06, | |
| "loss": 1.0833, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03234657039711191, | |
| "grad_norm": 0.11489821970462799, | |
| "learning_rate": 2.4119300522370323e-06, | |
| "loss": 1.0795, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03350180505415162, | |
| "grad_norm": 0.11093555390834808, | |
| "learning_rate": 2.4373299964982607e-06, | |
| "loss": 1.0599, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.034657039711191336, | |
| "grad_norm": 0.10726247727870941, | |
| "learning_rate": 2.4618687578661045e-06, | |
| "loss": 1.066, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03581227436823105, | |
| "grad_norm": 0.0960482507944107, | |
| "learning_rate": 2.4856028230571215e-06, | |
| "loss": 1.0332, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.03696750902527076, | |
| "grad_norm": 0.09282615780830383, | |
| "learning_rate": 2.5085832971998437e-06, | |
| "loss": 1.0566, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03812274368231047, | |
| "grad_norm": 0.0818951278924942, | |
| "learning_rate": 2.530856566463146e-06, | |
| "loss": 1.0351, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03927797833935018, | |
| "grad_norm": 0.07113130390644073, | |
| "learning_rate": 2.5524648617370923e-06, | |
| "loss": 1.0164, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04043321299638989, | |
| "grad_norm": 0.062365710735321045, | |
| "learning_rate": 2.5734467405837933e-06, | |
| "loss": 1.0125, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0415884476534296, | |
| "grad_norm": 0.057508960366249084, | |
| "learning_rate": 2.5938375012788124e-06, | |
| "loss": 1.0392, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.042743682310469315, | |
| "grad_norm": 0.0539795346558094, | |
| "learning_rate": 2.6136695401116585e-06, | |
| "loss": 1.0244, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04389891696750903, | |
| "grad_norm": 0.053800784051418304, | |
| "learning_rate": 2.632972661028017e-06, | |
| "loss": 0.9972, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04505415162454874, | |
| "grad_norm": 0.05475891754031181, | |
| "learning_rate": 2.6517743450441657e-06, | |
| "loss": 1.0142, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.046209386281588445, | |
| "grad_norm": 0.0571669340133667, | |
| "learning_rate": 2.6700999855466042e-06, | |
| "loss": 0.9995, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04736462093862816, | |
| "grad_norm": 0.05862262472510338, | |
| "learning_rate": 2.687973094532893e-06, | |
| "loss": 0.998, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04851985559566787, | |
| "grad_norm": 0.0590708963572979, | |
| "learning_rate": 2.705415483996501e-06, | |
| "loss": 0.9798, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04967509025270758, | |
| "grad_norm": 0.056892745196819305, | |
| "learning_rate": 2.722447425965978e-06, | |
| "loss": 0.9879, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.050830324909747295, | |
| "grad_norm": 0.0541134737432003, | |
| "learning_rate": 2.739087794143646e-06, | |
| "loss": 0.9722, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05198555956678701, | |
| "grad_norm": 0.048609230667352676, | |
| "learning_rate": 2.7553541896255733e-06, | |
| "loss": 0.9491, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05314079422382671, | |
| "grad_norm": 0.04965611547231674, | |
| "learning_rate": 2.771263052802624e-06, | |
| "loss": 1.0037, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.054296028880866425, | |
| "grad_norm": 0.043069027364254, | |
| "learning_rate": 2.7868297632261957e-06, | |
| "loss": 0.971, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.05545126353790614, | |
| "grad_norm": 0.03820377215743065, | |
| "learning_rate": 2.8020687289593126e-06, | |
| "loss": 1.0084, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05660649819494585, | |
| "grad_norm": 0.03588235378265381, | |
| "learning_rate": 2.8169934667141895e-06, | |
| "loss": 0.9507, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.05776173285198556, | |
| "grad_norm": 0.03598296642303467, | |
| "learning_rate": 2.8316166738933647e-06, | |
| "loss": 0.973, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.058916967509025274, | |
| "grad_norm": 0.0384756475687027, | |
| "learning_rate": 2.845950293496561e-06, | |
| "loss": 0.9758, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.06007220216606498, | |
| "grad_norm": 0.03798473998904228, | |
| "learning_rate": 2.8600055727246655e-06, | |
| "loss": 0.9991, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06122743682310469, | |
| "grad_norm": 0.039415981620550156, | |
| "learning_rate": 2.8737931160013154e-06, | |
| "loss": 0.9661, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.062382671480144404, | |
| "grad_norm": 0.04056290537118912, | |
| "learning_rate": 2.887322933038281e-06, | |
| "loss": 0.9773, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06353790613718412, | |
| "grad_norm": 0.03856893256306648, | |
| "learning_rate": 2.900604482490407e-06, | |
| "loss": 0.9644, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06469314079422382, | |
| "grad_norm": 0.03817951679229736, | |
| "learning_rate": 2.9136467116770013e-06, | |
| "loss": 0.9701, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06584837545126354, | |
| "grad_norm": 0.03559865057468414, | |
| "learning_rate": 2.926458092787486e-06, | |
| "loss": 0.9314, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06700361010830325, | |
| "grad_norm": 0.03424916788935661, | |
| "learning_rate": 2.9390466559382293e-06, | |
| "loss": 0.9489, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06815884476534297, | |
| "grad_norm": 0.03320642188191414, | |
| "learning_rate": 2.951420019403574e-06, | |
| "loss": 0.9697, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06931407942238267, | |
| "grad_norm": 0.031773000955581665, | |
| "learning_rate": 2.963585417306073e-06, | |
| "loss": 0.923, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07046931407942238, | |
| "grad_norm": 0.0303028617054224, | |
| "learning_rate": 2.9755497250179457e-06, | |
| "loss": 0.9448, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.0716245487364621, | |
| "grad_norm": 0.029582129791378975, | |
| "learning_rate": 2.98731948249709e-06, | |
| "loss": 0.9521, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0727797833935018, | |
| "grad_norm": 0.029476916417479515, | |
| "learning_rate": 2.9989009157559695e-06, | |
| "loss": 0.9429, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.07393501805054152, | |
| "grad_norm": 0.02916543185710907, | |
| "learning_rate": 3.0102999566398123e-06, | |
| "loss": 0.9641, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07509025270758123, | |
| "grad_norm": 0.029549594968557358, | |
| "learning_rate": 3.021522261071426e-06, | |
| "loss": 0.9555, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07624548736462095, | |
| "grad_norm": 0.03075719065964222, | |
| "learning_rate": 3.0325732259031143e-06, | |
| "loss": 0.9531, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07740072202166065, | |
| "grad_norm": 0.0289381705224514, | |
| "learning_rate": 3.0434580045013773e-06, | |
| "loss": 0.9358, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07855595667870036, | |
| "grad_norm": 0.03085014969110489, | |
| "learning_rate": 3.054181521177061e-06, | |
| "loss": 0.9496, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07971119133574008, | |
| "grad_norm": 0.02944289892911911, | |
| "learning_rate": 3.064748484562093e-06, | |
| "loss": 0.931, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.08086642599277978, | |
| "grad_norm": 0.02958507277071476, | |
| "learning_rate": 3.075163400023762e-06, | |
| "loss": 0.9674, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0820216606498195, | |
| "grad_norm": 0.029631255194544792, | |
| "learning_rate": 3.085430581198459e-06, | |
| "loss": 0.9504, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0831768953068592, | |
| "grad_norm": 0.028514275327324867, | |
| "learning_rate": 3.095554160718781e-06, | |
| "loss": 0.9513, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.08433212996389891, | |
| "grad_norm": 0.02768518030643463, | |
| "learning_rate": 3.1055381002007602e-06, | |
| "loss": 0.9152, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08548736462093863, | |
| "grad_norm": 0.028020448982715607, | |
| "learning_rate": 3.1153861995516275e-06, | |
| "loss": 0.9247, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08664259927797834, | |
| "grad_norm": 0.02758488804101944, | |
| "learning_rate": 3.1251021056528336e-06, | |
| "loss": 0.9285, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08779783393501805, | |
| "grad_norm": 0.02713642828166485, | |
| "learning_rate": 3.1346893204679857e-06, | |
| "loss": 0.928, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08895306859205776, | |
| "grad_norm": 0.026601964607834816, | |
| "learning_rate": 3.1441512086208035e-06, | |
| "loss": 0.9135, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.09010830324909748, | |
| "grad_norm": 0.02707557938992977, | |
| "learning_rate": 3.1534910044841343e-06, | |
| "loss": 0.9306, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09126353790613718, | |
| "grad_norm": 0.027205491438508034, | |
| "learning_rate": 3.1627118188174026e-06, | |
| "loss": 0.9475, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.09241877256317689, | |
| "grad_norm": 0.02767670899629593, | |
| "learning_rate": 3.171816644986573e-06, | |
| "loss": 0.9445, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09357400722021661, | |
| "grad_norm": 0.027132879942655563, | |
| "learning_rate": 3.18080836479775e-06, | |
| "loss": 0.9388, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.09472924187725631, | |
| "grad_norm": 0.07792849093675613, | |
| "learning_rate": 3.1896897539728615e-06, | |
| "loss": 0.9097, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09588447653429603, | |
| "grad_norm": 0.026650305837392807, | |
| "learning_rate": 3.1984634872934573e-06, | |
| "loss": 0.9216, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.09703971119133574, | |
| "grad_norm": 0.02636835351586342, | |
| "learning_rate": 3.2071321434364693e-06, | |
| "loss": 0.9397, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09819494584837545, | |
| "grad_norm": 0.025988014414906502, | |
| "learning_rate": 3.2156982095238214e-06, | |
| "loss": 0.9166, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09935018050541516, | |
| "grad_norm": 0.02622906304895878, | |
| "learning_rate": 3.2241640854059465e-06, | |
| "loss": 0.9213, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10050541516245487, | |
| "grad_norm": 0.02663242444396019, | |
| "learning_rate": 3.232532087697698e-06, | |
| "loss": 0.9292, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.10166064981949459, | |
| "grad_norm": 0.02621094323694706, | |
| "learning_rate": 3.2408044535836154e-06, | |
| "loss": 0.923, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1028158844765343, | |
| "grad_norm": 0.02623113803565502, | |
| "learning_rate": 3.248983344408188e-06, | |
| "loss": 0.9252, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.10397111913357401, | |
| "grad_norm": 0.02616356685757637, | |
| "learning_rate": 3.257070849065542e-06, | |
| "loss": 0.9423, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10512635379061372, | |
| "grad_norm": 0.025917142629623413, | |
| "learning_rate": 3.2650689872018227e-06, | |
| "loss": 0.9337, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.10628158844765342, | |
| "grad_norm": 0.02607305720448494, | |
| "learning_rate": 3.2729797122425927e-06, | |
| "loss": 0.9134, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10743682310469314, | |
| "grad_norm": 0.02566445618867874, | |
| "learning_rate": 3.280804914256559e-06, | |
| "loss": 0.9182, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10859205776173285, | |
| "grad_norm": 0.025618452578783035, | |
| "learning_rate": 3.2885464226661647e-06, | |
| "loss": 0.883, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10974729241877257, | |
| "grad_norm": 0.02570994757115841, | |
| "learning_rate": 3.2962060088147467e-06, | |
| "loss": 0.9009, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.11090252707581227, | |
| "grad_norm": 0.02652270346879959, | |
| "learning_rate": 3.303785388399281e-06, | |
| "loss": 0.9264, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11205776173285198, | |
| "grad_norm": 0.026324449107050896, | |
| "learning_rate": 3.3112862237770753e-06, | |
| "loss": 0.9072, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1132129963898917, | |
| "grad_norm": 0.026178548112511635, | |
| "learning_rate": 3.318710126154159e-06, | |
| "loss": 0.9226, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1143682310469314, | |
| "grad_norm": 0.025392455980181694, | |
| "learning_rate": 3.3260586576625835e-06, | |
| "loss": 0.9002, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.11552346570397112, | |
| "grad_norm": 0.02630504220724106, | |
| "learning_rate": 3.333333333333334e-06, | |
| "loss": 0.9335, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11667870036101083, | |
| "grad_norm": 0.026046302169561386, | |
| "learning_rate": 3.340535622971072e-06, | |
| "loss": 0.9079, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.11783393501805055, | |
| "grad_norm": 0.025761395692825317, | |
| "learning_rate": 3.3476669529365297e-06, | |
| "loss": 0.9188, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11898916967509025, | |
| "grad_norm": 0.025253284722566605, | |
| "learning_rate": 3.3547287078419544e-06, | |
| "loss": 0.9207, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.12014440433212996, | |
| "grad_norm": 0.025493199005723, | |
| "learning_rate": 3.361722232164634e-06, | |
| "loss": 0.8937, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12129963898916968, | |
| "grad_norm": 0.025204647332429886, | |
| "learning_rate": 3.3686488317832306e-06, | |
| "loss": 0.9189, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.12245487364620938, | |
| "grad_norm": 0.025601711124181747, | |
| "learning_rate": 3.375509775441284e-06, | |
| "loss": 0.9034, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.1236101083032491, | |
| "grad_norm": 0.025162257254123688, | |
| "learning_rate": 3.3823062961420163e-06, | |
| "loss": 0.9008, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.12476534296028881, | |
| "grad_norm": 0.025047749280929565, | |
| "learning_rate": 3.3890395924782498e-06, | |
| "loss": 0.8788, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.12592057761732853, | |
| "grad_norm": 0.026021016761660576, | |
| "learning_rate": 3.3957108299010395e-06, | |
| "loss": 0.9252, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.12707581227436823, | |
| "grad_norm": 0.025183433666825294, | |
| "learning_rate": 3.402321141930376e-06, | |
| "loss": 0.9221, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12823104693140794, | |
| "grad_norm": 0.02558548003435135, | |
| "learning_rate": 3.408871631311096e-06, | |
| "loss": 0.8957, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.12938628158844764, | |
| "grad_norm": 0.025612782686948776, | |
| "learning_rate": 3.415363371116969e-06, | |
| "loss": 0.8895, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.13054151624548738, | |
| "grad_norm": 0.024990661069750786, | |
| "learning_rate": 3.4217974058057e-06, | |
| "loss": 0.9094, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.13169675090252708, | |
| "grad_norm": 0.026293708011507988, | |
| "learning_rate": 3.428174752227455e-06, | |
| "loss": 0.9063, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.1328519855595668, | |
| "grad_norm": 0.02521086297929287, | |
| "learning_rate": 3.434496400589353e-06, | |
| "loss": 0.9164, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1340072202166065, | |
| "grad_norm": 0.026099544018507004, | |
| "learning_rate": 3.440763315378198e-06, | |
| "loss": 0.9125, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.1351624548736462, | |
| "grad_norm": 0.02469206601381302, | |
| "learning_rate": 3.446976436243603e-06, | |
| "loss": 0.892, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.13631768953068593, | |
| "grad_norm": 0.025795504450798035, | |
| "learning_rate": 3.4531366788435426e-06, | |
| "loss": 0.896, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.13747292418772564, | |
| "grad_norm": 0.024762745946645737, | |
| "learning_rate": 3.4592449356542185e-06, | |
| "loss": 0.8718, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.13862815884476534, | |
| "grad_norm": 0.02494460716843605, | |
| "learning_rate": 3.4653020767460416e-06, | |
| "loss": 0.9059, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13978339350180505, | |
| "grad_norm": 0.02601913921535015, | |
| "learning_rate": 3.471308950527417e-06, | |
| "loss": 0.9188, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.14093862815884475, | |
| "grad_norm": 0.025598011910915375, | |
| "learning_rate": 3.4772663844579142e-06, | |
| "loss": 0.9289, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1420938628158845, | |
| "grad_norm": 0.025548091158270836, | |
| "learning_rate": 3.48317518573233e-06, | |
| "loss": 0.9007, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1432490974729242, | |
| "grad_norm": 0.02472161501646042, | |
| "learning_rate": 3.4890361419370587e-06, | |
| "loss": 0.8807, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.1444043321299639, | |
| "grad_norm": 0.025738514959812164, | |
| "learning_rate": 3.4948500216800947e-06, | |
| "loss": 0.8969, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1455595667870036, | |
| "grad_norm": 0.025138631463050842, | |
| "learning_rate": 3.5006175751959385e-06, | |
| "loss": 0.9001, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1467148014440433, | |
| "grad_norm": 0.025624800473451614, | |
| "learning_rate": 3.506339534926595e-06, | |
| "loss": 0.9093, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.14787003610108304, | |
| "grad_norm": 0.02503197453916073, | |
| "learning_rate": 3.512016616079781e-06, | |
| "loss": 0.8793, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14902527075812275, | |
| "grad_norm": 0.02515401318669319, | |
| "learning_rate": 3.5176495171654153e-06, | |
| "loss": 0.8855, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.15018050541516245, | |
| "grad_norm": 0.024870432913303375, | |
| "learning_rate": 3.523238920511395e-06, | |
| "loss": 0.8642, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.15133574007220216, | |
| "grad_norm": 0.02646622247993946, | |
| "learning_rate": 3.528785492759607e-06, | |
| "loss": 0.8966, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1524909747292419, | |
| "grad_norm": 0.025933699682354927, | |
| "learning_rate": 3.5342898853430833e-06, | |
| "loss": 0.9036, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1536462093862816, | |
| "grad_norm": 0.025004137307405472, | |
| "learning_rate": 3.5397527349451433e-06, | |
| "loss": 0.8796, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1548014440433213, | |
| "grad_norm": 0.027062473818659782, | |
| "learning_rate": 3.5451746639413463e-06, | |
| "loss": 0.8993, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.155956678700361, | |
| "grad_norm": 0.025223543867468834, | |
| "learning_rate": 3.550556280825011e-06, | |
| "loss": 0.8887, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1571119133574007, | |
| "grad_norm": 0.02600521221756935, | |
| "learning_rate": 3.55589818061703e-06, | |
| "loss": 0.8799, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.15826714801444045, | |
| "grad_norm": 0.026399623602628708, | |
| "learning_rate": 3.5612009452606784e-06, | |
| "loss": 0.9006, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.15942238267148015, | |
| "grad_norm": 0.025787660852074623, | |
| "learning_rate": 3.5664651440020615e-06, | |
| "loss": 0.9254, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.16057761732851986, | |
| "grad_norm": 0.025667186826467514, | |
| "learning_rate": 3.5716913337568255e-06, | |
| "loss": 0.8843, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.16173285198555956, | |
| "grad_norm": 0.025686215609312057, | |
| "learning_rate": 3.5768800594637304e-06, | |
| "loss": 0.8904, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.16288808664259927, | |
| "grad_norm": 0.027956154197454453, | |
| "learning_rate": 3.582031854425634e-06, | |
| "loss": 0.9226, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.164043321299639, | |
| "grad_norm": 0.025962915271520615, | |
| "learning_rate": 3.587147240638428e-06, | |
| "loss": 0.9165, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1651985559566787, | |
| "grad_norm": 0.026672059670090675, | |
| "learning_rate": 3.5922267291084367e-06, | |
| "loss": 0.8984, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1663537906137184, | |
| "grad_norm": 0.025454359129071236, | |
| "learning_rate": 3.59727082015875e-06, | |
| "loss": 0.9018, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.16750902527075812, | |
| "grad_norm": 0.02756405621767044, | |
| "learning_rate": 3.6022800037249583e-06, | |
| "loss": 0.8928, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.16866425992779782, | |
| "grad_norm": 0.025225916877388954, | |
| "learning_rate": 3.607254759640729e-06, | |
| "loss": 0.887, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.16981949458483755, | |
| "grad_norm": 0.026238933205604553, | |
| "learning_rate": 3.612195557913627e-06, | |
| "loss": 0.8906, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.17097472924187726, | |
| "grad_norm": 0.026623785495758057, | |
| "learning_rate": 3.6171028589915957e-06, | |
| "loss": 0.8866, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.17212996389891697, | |
| "grad_norm": 0.02516297437250614, | |
| "learning_rate": 3.6219771140204575e-06, | |
| "loss": 0.875, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.17328519855595667, | |
| "grad_norm": 0.026302075013518333, | |
| "learning_rate": 3.626818765092802e-06, | |
| "loss": 0.9158, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17444043321299638, | |
| "grad_norm": 0.02607031911611557, | |
| "learning_rate": 3.631628245488616e-06, | |
| "loss": 0.8595, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.1755956678700361, | |
| "grad_norm": 0.025099189952015877, | |
| "learning_rate": 3.6364059799079547e-06, | |
| "loss": 0.8762, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.17675090252707581, | |
| "grad_norm": 0.024900630116462708, | |
| "learning_rate": 3.6411523846959985e-06, | |
| "loss": 0.8769, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.17790613718411552, | |
| "grad_norm": 0.02502143569290638, | |
| "learning_rate": 3.6458678680607725e-06, | |
| "loss": 0.9109, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.17906137184115523, | |
| "grad_norm": 0.025041181594133377, | |
| "learning_rate": 3.6505528302838196e-06, | |
| "loss": 0.8624, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.18021660649819496, | |
| "grad_norm": 0.02584444358944893, | |
| "learning_rate": 3.655207663924103e-06, | |
| "loss": 0.8581, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.18137184115523466, | |
| "grad_norm": 0.024667399004101753, | |
| "learning_rate": 3.65983275401539e-06, | |
| "loss": 0.8699, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.18252707581227437, | |
| "grad_norm": 0.025528263300657272, | |
| "learning_rate": 3.664428478257371e-06, | |
| "loss": 0.8896, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.18368231046931408, | |
| "grad_norm": 0.025437112897634506, | |
| "learning_rate": 3.6689952072007528e-06, | |
| "loss": 0.8954, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.18483754512635378, | |
| "grad_norm": 0.025140732526779175, | |
| "learning_rate": 3.6735333044265414e-06, | |
| "loss": 0.8794, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1859927797833935, | |
| "grad_norm": 0.026126103475689888, | |
| "learning_rate": 3.6780431267197503e-06, | |
| "loss": 0.9103, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.18714801444043322, | |
| "grad_norm": 0.025869259610772133, | |
| "learning_rate": 3.6825250242377186e-06, | |
| "loss": 0.8945, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.18830324909747292, | |
| "grad_norm": 0.02550615929067135, | |
| "learning_rate": 3.6869793406732633e-06, | |
| "loss": 0.8884, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.18945848375451263, | |
| "grad_norm": 0.02724931389093399, | |
| "learning_rate": 3.69140641341283e-06, | |
| "loss": 0.893, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.19061371841155234, | |
| "grad_norm": 0.02519422210752964, | |
| "learning_rate": 3.6958065736898442e-06, | |
| "loss": 0.8739, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.19176895306859207, | |
| "grad_norm": 0.025711793452501297, | |
| "learning_rate": 3.700180146733426e-06, | |
| "loss": 0.8832, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.19292418772563177, | |
| "grad_norm": 0.02538181096315384, | |
| "learning_rate": 3.7045274519126395e-06, | |
| "loss": 0.8785, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.19407942238267148, | |
| "grad_norm": 0.0263076052069664, | |
| "learning_rate": 3.7088488028764387e-06, | |
| "loss": 0.8722, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.19523465703971118, | |
| "grad_norm": 0.02613968588411808, | |
| "learning_rate": 3.7131445076894563e-06, | |
| "loss": 0.8833, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.1963898916967509, | |
| "grad_norm": 0.024628346785902977, | |
| "learning_rate": 3.717414868963791e-06, | |
| "loss": 0.8795, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.19754512635379062, | |
| "grad_norm": 0.026414738968014717, | |
| "learning_rate": 3.721660183986924e-06, | |
| "loss": 0.9025, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.19870036101083033, | |
| "grad_norm": 0.025311259552836418, | |
| "learning_rate": 3.725880744845915e-06, | |
| "loss": 0.9047, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.19985559566787003, | |
| "grad_norm": 0.025876285508275032, | |
| "learning_rate": 3.7300768385479928e-06, | |
| "loss": 0.8571, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.20101083032490974, | |
| "grad_norm": 0.02702619880437851, | |
| "learning_rate": 3.7342487471376667e-06, | |
| "loss": 0.8776, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.20216606498194944, | |
| "grad_norm": 0.026685267686843872, | |
| "learning_rate": 3.7383967478104918e-06, | |
| "loss": 0.9018, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.20332129963898918, | |
| "grad_norm": 0.026544688269495964, | |
| "learning_rate": 3.7425211130235835e-06, | |
| "loss": 0.8861, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.20447653429602888, | |
| "grad_norm": 0.026582278311252594, | |
| "learning_rate": 3.7466221106030114e-06, | |
| "loss": 0.8974, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.2056317689530686, | |
| "grad_norm": 0.02530672959983349, | |
| "learning_rate": 3.7507000038481574e-06, | |
| "loss": 0.8904, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.2067870036101083, | |
| "grad_norm": 0.02869655378162861, | |
| "learning_rate": 3.7547550516331556e-06, | |
| "loss": 0.8637, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.20794223826714803, | |
| "grad_norm": 0.0269013699144125, | |
| "learning_rate": 3.7587875085055104e-06, | |
| "loss": 0.8718, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.20909747292418773, | |
| "grad_norm": 0.027417806908488274, | |
| "learning_rate": 3.762797624781975e-06, | |
| "loss": 0.8697, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.21025270758122744, | |
| "grad_norm": 0.025989564135670662, | |
| "learning_rate": 3.7667856466417917e-06, | |
| "loss": 0.8838, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.21140794223826714, | |
| "grad_norm": 0.026576591655611992, | |
| "learning_rate": 3.7707518162173835e-06, | |
| "loss": 0.8704, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.21256317689530685, | |
| "grad_norm": 0.0275897067040205, | |
| "learning_rate": 3.7746963716825613e-06, | |
| "loss": 0.855, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.21371841155234658, | |
| "grad_norm": 0.0267089381814003, | |
| "learning_rate": 3.778619547338356e-06, | |
| "loss": 0.8712, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2148736462093863, | |
| "grad_norm": 0.02677794173359871, | |
| "learning_rate": 3.7825215736965283e-06, | |
| "loss": 0.8729, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.216028880866426, | |
| "grad_norm": 0.0281060878187418, | |
| "learning_rate": 3.786402677560832e-06, | |
| "loss": 0.8845, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2171841155234657, | |
| "grad_norm": 0.025514084845781326, | |
| "learning_rate": 3.7902630821061337e-06, | |
| "loss": 0.8669, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2183393501805054, | |
| "grad_norm": 0.02548467181622982, | |
| "learning_rate": 3.7941030069554073e-06, | |
| "loss": 0.8897, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.21949458483754514, | |
| "grad_norm": 0.026287022978067398, | |
| "learning_rate": 3.7979226682547152e-06, | |
| "loss": 0.8731, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.22064981949458484, | |
| "grad_norm": 0.025090090930461884, | |
| "learning_rate": 3.8017222787462132e-06, | |
| "loss": 0.8946, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.22180505415162455, | |
| "grad_norm": 0.02495192363858223, | |
| "learning_rate": 3.8055020478392497e-06, | |
| "loss": 0.8567, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.22296028880866425, | |
| "grad_norm": 0.025306979194283485, | |
| "learning_rate": 3.8092621816796233e-06, | |
| "loss": 0.8819, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.22411552346570396, | |
| "grad_norm": 0.027035973966121674, | |
| "learning_rate": 3.813002883217044e-06, | |
| "loss": 0.8733, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2252707581227437, | |
| "grad_norm": 0.025134000927209854, | |
| "learning_rate": 3.816724352270864e-06, | |
| "loss": 0.8764, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2264259927797834, | |
| "grad_norm": 0.025370297953486443, | |
| "learning_rate": 3.820426785594127e-06, | |
| "loss": 0.8607, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2275812274368231, | |
| "grad_norm": 0.02679363079369068, | |
| "learning_rate": 3.824110376935989e-06, | |
| "loss": 0.8726, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2287364620938628, | |
| "grad_norm": 0.024456890299916267, | |
| "learning_rate": 3.827775317102552e-06, | |
| "loss": 0.858, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2298916967509025, | |
| "grad_norm": 0.026032116264104843, | |
| "learning_rate": 3.831421794016178e-06, | |
| "loss": 0.8606, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.23104693140794225, | |
| "grad_norm": 0.026059836149215698, | |
| "learning_rate": 3.835049992773302e-06, | |
| "loss": 0.8613, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23220216606498195, | |
| "grad_norm": 0.02430492639541626, | |
| "learning_rate": 3.8386600957008155e-06, | |
| "loss": 0.866, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.23335740072202166, | |
| "grad_norm": 0.026345418766140938, | |
| "learning_rate": 3.8422522824110405e-06, | |
| "loss": 0.8814, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.23451263537906136, | |
| "grad_norm": 0.025827715173363686, | |
| "learning_rate": 3.8458267298553554e-06, | |
| "loss": 0.8791, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2356678700361011, | |
| "grad_norm": 0.025157129392027855, | |
| "learning_rate": 3.849383612376498e-06, | |
| "loss": 0.8797, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2368231046931408, | |
| "grad_norm": 0.02571243792772293, | |
| "learning_rate": 3.852923101759591e-06, | |
| "loss": 0.8866, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2379783393501805, | |
| "grad_norm": 0.025041643530130386, | |
| "learning_rate": 3.856445367281923e-06, | |
| "loss": 0.8666, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2391335740072202, | |
| "grad_norm": 0.0259910486638546, | |
| "learning_rate": 3.8599505757615295e-06, | |
| "loss": 0.8727, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.24028880866425992, | |
| "grad_norm": 0.02511589229106903, | |
| "learning_rate": 3.863438891604603e-06, | |
| "loss": 0.8801, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.24144404332129965, | |
| "grad_norm": 0.024072881788015366, | |
| "learning_rate": 3.866910476851757e-06, | |
| "loss": 0.8203, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.24259927797833936, | |
| "grad_norm": 0.025221582502126694, | |
| "learning_rate": 3.870365491223199e-06, | |
| "loss": 0.8384, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.24375451263537906, | |
| "grad_norm": 0.02576667070388794, | |
| "learning_rate": 3.873804092162822e-06, | |
| "loss": 0.8423, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.24490974729241877, | |
| "grad_norm": 0.02616226114332676, | |
| "learning_rate": 3.877226434881253e-06, | |
| "loss": 0.8521, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.24606498194945847, | |
| "grad_norm": 0.024735651910305023, | |
| "learning_rate": 3.880632672397897e-06, | |
| "loss": 0.8847, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.2472202166064982, | |
| "grad_norm": 0.02537655271589756, | |
| "learning_rate": 3.884022955581984e-06, | |
| "loss": 0.86, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2483754512635379, | |
| "grad_norm": 0.02478659339249134, | |
| "learning_rate": 3.887397433192676e-06, | |
| "loss": 0.8489, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.24953068592057762, | |
| "grad_norm": 0.025592437013983727, | |
| "learning_rate": 3.890756251918219e-06, | |
| "loss": 0.845, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.25068592057761735, | |
| "grad_norm": 0.02625833824276924, | |
| "learning_rate": 3.894099556414216e-06, | |
| "loss": 0.8584, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.25184115523465705, | |
| "grad_norm": 0.02509414032101631, | |
| "learning_rate": 3.897427489341009e-06, | |
| "loss": 0.8608, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.25299638989169676, | |
| "grad_norm": 0.02498655766248703, | |
| "learning_rate": 3.900740191400198e-06, | |
| "loss": 0.8509, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.25415162454873647, | |
| "grad_norm": 0.025928007438778877, | |
| "learning_rate": 3.9040378013703444e-06, | |
| "loss": 0.8556, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.25530685920577617, | |
| "grad_norm": 0.026060784235596657, | |
| "learning_rate": 3.907320456141851e-06, | |
| "loss": 0.8495, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2564620938628159, | |
| "grad_norm": 0.02592143975198269, | |
| "learning_rate": 3.910588290751064e-06, | |
| "loss": 0.8527, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2576173285198556, | |
| "grad_norm": 0.025333942845463753, | |
| "learning_rate": 3.913841438413601e-06, | |
| "loss": 0.8398, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2587725631768953, | |
| "grad_norm": 0.026303421705961227, | |
| "learning_rate": 3.9170800305569385e-06, | |
| "loss": 0.8575, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.259927797833935, | |
| "grad_norm": 0.025790376588702202, | |
| "learning_rate": 3.920304196852272e-06, | |
| "loss": 0.8804, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.26108303249097475, | |
| "grad_norm": 0.02600492723286152, | |
| "learning_rate": 3.923514065245669e-06, | |
| "loss": 0.8532, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.26223826714801446, | |
| "grad_norm": 0.025459101423621178, | |
| "learning_rate": 3.9267097619885385e-06, | |
| "loss": 0.8597, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.26339350180505416, | |
| "grad_norm": 0.025850312784314156, | |
| "learning_rate": 3.9298914116674236e-06, | |
| "loss": 0.8605, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.26454873646209387, | |
| "grad_norm": 0.0261206217110157, | |
| "learning_rate": 3.9330591372331475e-06, | |
| "loss": 0.8463, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2657039711191336, | |
| "grad_norm": 0.026850640773773193, | |
| "learning_rate": 3.936213060029322e-06, | |
| "loss": 0.8655, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2668592057761733, | |
| "grad_norm": 0.02615203708410263, | |
| "learning_rate": 3.9393532998202405e-06, | |
| "loss": 0.8729, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.268014440433213, | |
| "grad_norm": 0.026082858443260193, | |
| "learning_rate": 3.942479974818166e-06, | |
| "loss": 0.8626, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2691696750902527, | |
| "grad_norm": 0.02707120031118393, | |
| "learning_rate": 3.9455932017100315e-06, | |
| "loss": 0.8665, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.2703249097472924, | |
| "grad_norm": 0.02531982958316803, | |
| "learning_rate": 3.9486930956835726e-06, | |
| "loss": 0.841, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.27148014440433216, | |
| "grad_norm": 0.02577449567615986, | |
| "learning_rate": 3.951779770452894e-06, | |
| "loss": 0.8528, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.27263537906137186, | |
| "grad_norm": 0.025777166709303856, | |
| "learning_rate": 3.954853338283512e-06, | |
| "loss": 0.8636, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.27379061371841157, | |
| "grad_norm": 0.027185678482055664, | |
| "learning_rate": 3.95791391001684e-06, | |
| "loss": 0.884, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2749458483754513, | |
| "grad_norm": 0.025784213095903397, | |
| "learning_rate": 3.960961595094187e-06, | |
| "loss": 0.8532, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.276101083032491, | |
| "grad_norm": 0.025824446231126785, | |
| "learning_rate": 3.96399650158023e-06, | |
| "loss": 0.8555, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.2772563176895307, | |
| "grad_norm": 0.026414718478918076, | |
| "learning_rate": 3.96701873618601e-06, | |
| "loss": 0.8634, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2784115523465704, | |
| "grad_norm": 0.02620028331875801, | |
| "learning_rate": 3.970028404291448e-06, | |
| "loss": 0.8574, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2795667870036101, | |
| "grad_norm": 0.025768935680389404, | |
| "learning_rate": 3.973025609967386e-06, | |
| "loss": 0.8741, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2807220216606498, | |
| "grad_norm": 0.027099501341581345, | |
| "learning_rate": 3.976010455997187e-06, | |
| "loss": 0.8705, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.2818772563176895, | |
| "grad_norm": 0.02535291016101837, | |
| "learning_rate": 3.978983043897884e-06, | |
| "loss": 0.853, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.28303249097472927, | |
| "grad_norm": 0.02731909230351448, | |
| "learning_rate": 3.981943473940888e-06, | |
| "loss": 0.8776, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.284187725631769, | |
| "grad_norm": 0.02630050666630268, | |
| "learning_rate": 3.984891845172299e-06, | |
| "loss": 0.86, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2853429602888087, | |
| "grad_norm": 0.025558151304721832, | |
| "learning_rate": 3.987828255432777e-06, | |
| "loss": 0.8758, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2864981949458484, | |
| "grad_norm": 0.026159491389989853, | |
| "learning_rate": 3.990752801377028e-06, | |
| "loss": 0.8626, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2876534296028881, | |
| "grad_norm": 0.02615942806005478, | |
| "learning_rate": 3.993665578492894e-06, | |
| "loss": 0.858, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.2888086642599278, | |
| "grad_norm": 0.025161130353808403, | |
| "learning_rate": 3.996566681120062e-06, | |
| "loss": 0.8429, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2899638989169675, | |
| "grad_norm": 0.02635674737393856, | |
| "learning_rate": 3.999456202468397e-06, | |
| "loss": 0.8686, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.2911191335740072, | |
| "grad_norm": 0.025895683094859123, | |
| "learning_rate": 4.0023342346359075e-06, | |
| "loss": 0.8422, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2922743682310469, | |
| "grad_norm": 0.027750222012400627, | |
| "learning_rate": 4.0052008686263635e-06, | |
| "loss": 0.8868, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2934296028880866, | |
| "grad_norm": 0.02600325271487236, | |
| "learning_rate": 4.0080561943665644e-06, | |
| "loss": 0.8639, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2945848375451264, | |
| "grad_norm": 0.02592737227678299, | |
| "learning_rate": 4.010900300723259e-06, | |
| "loss": 0.8453, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2957400722021661, | |
| "grad_norm": 0.025766368955373764, | |
| "learning_rate": 4.0137332755197495e-06, | |
| "loss": 0.8771, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2968953068592058, | |
| "grad_norm": 0.026158476248383522, | |
| "learning_rate": 4.016555205552159e-06, | |
| "loss": 0.8524, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.2980505415162455, | |
| "grad_norm": 0.024570690467953682, | |
| "learning_rate": 4.019366176605384e-06, | |
| "loss": 0.8345, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2992057761732852, | |
| "grad_norm": 0.025776837021112442, | |
| "learning_rate": 4.022166273468753e-06, | |
| "loss": 0.8441, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.3003610108303249, | |
| "grad_norm": 0.02488660253584385, | |
| "learning_rate": 4.024955579951363e-06, | |
| "loss": 0.8639, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3015162454873646, | |
| "grad_norm": 0.025739185512065887, | |
| "learning_rate": 4.0277341788971355e-06, | |
| "loss": 0.8465, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.3026714801444043, | |
| "grad_norm": 0.02594810351729393, | |
| "learning_rate": 4.030502152199576e-06, | |
| "loss": 0.8752, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.303826714801444, | |
| "grad_norm": 0.025780096650123596, | |
| "learning_rate": 4.033259580816264e-06, | |
| "loss": 0.8433, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.3049819494584838, | |
| "grad_norm": 0.025661000981926918, | |
| "learning_rate": 4.036006544783052e-06, | |
| "loss": 0.8769, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3061371841155235, | |
| "grad_norm": 0.025767603889107704, | |
| "learning_rate": 4.0387431232280135e-06, | |
| "loss": 0.8705, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.3072924187725632, | |
| "grad_norm": 0.0245257169008255, | |
| "learning_rate": 4.041469394385113e-06, | |
| "loss": 0.8497, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.3084476534296029, | |
| "grad_norm": 0.026073751971125603, | |
| "learning_rate": 4.044185435607626e-06, | |
| "loss": 0.8503, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3096028880866426, | |
| "grad_norm": 0.025399446487426758, | |
| "learning_rate": 4.046891323381315e-06, | |
| "loss": 0.8594, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.3107581227436823, | |
| "grad_norm": 0.02536724880337715, | |
| "learning_rate": 4.049587133337347e-06, | |
| "loss": 0.8513, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.311913357400722, | |
| "grad_norm": 0.024877896532416344, | |
| "learning_rate": 4.05227294026498e-06, | |
| "loss": 0.8302, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3130685920577617, | |
| "grad_norm": 0.026714155450463295, | |
| "learning_rate": 4.05494881812401e-06, | |
| "loss": 0.8787, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.3142238267148014, | |
| "grad_norm": 0.02595258131623268, | |
| "learning_rate": 4.057614840056999e-06, | |
| "loss": 0.8572, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.31537906137184113, | |
| "grad_norm": 0.02663499116897583, | |
| "learning_rate": 4.060271078401261e-06, | |
| "loss": 0.8656, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.3165342960288809, | |
| "grad_norm": 0.02580447867512703, | |
| "learning_rate": 4.0629176047006474e-06, | |
| "loss": 0.8574, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3176895306859206, | |
| "grad_norm": 0.02633056230843067, | |
| "learning_rate": 4.065554489717105e-06, | |
| "loss": 0.864, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3188447653429603, | |
| "grad_norm": 0.026197999715805054, | |
| "learning_rate": 4.06818180344203e-06, | |
| "loss": 0.8492, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.025230281054973602, | |
| "learning_rate": 4.070799615107415e-06, | |
| "loss": 0.8255, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3211552346570397, | |
| "grad_norm": 0.02663682959973812, | |
| "learning_rate": 4.073407993196794e-06, | |
| "loss": 0.8557, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3223104693140794, | |
| "grad_norm": 0.025046920403838158, | |
| "learning_rate": 4.076007005455996e-06, | |
| "loss": 0.8433, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.3234657039711191, | |
| "grad_norm": 0.025554031133651733, | |
| "learning_rate": 4.078596718903699e-06, | |
| "loss": 0.8639, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.32462093862815883, | |
| "grad_norm": 0.025266235694289207, | |
| "learning_rate": 4.0811771998418e-06, | |
| "loss": 0.8617, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.32577617328519853, | |
| "grad_norm": 0.026751738041639328, | |
| "learning_rate": 4.083748513865602e-06, | |
| "loss": 0.8472, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3269314079422383, | |
| "grad_norm": 0.026282917708158493, | |
| "learning_rate": 4.086310725873818e-06, | |
| "loss": 0.8518, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.328086642599278, | |
| "grad_norm": 0.024683093652129173, | |
| "learning_rate": 4.088863900078397e-06, | |
| "loss": 0.856, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3292418772563177, | |
| "grad_norm": 0.025772644206881523, | |
| "learning_rate": 4.091408100014184e-06, | |
| "loss": 0.849, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3303971119133574, | |
| "grad_norm": 0.02520694211125374, | |
| "learning_rate": 4.093943388548406e-06, | |
| "loss": 0.8363, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3315523465703971, | |
| "grad_norm": 0.026992499828338623, | |
| "learning_rate": 4.096469827889988e-06, | |
| "loss": 0.859, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3327075812274368, | |
| "grad_norm": 0.02580207772552967, | |
| "learning_rate": 4.0989874795987185e-06, | |
| "loss": 0.873, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3338628158844765, | |
| "grad_norm": 0.025334378704428673, | |
| "learning_rate": 4.101496404594247e-06, | |
| "loss": 0.848, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.33501805054151623, | |
| "grad_norm": 0.027060015127062798, | |
| "learning_rate": 4.103996663164927e-06, | |
| "loss": 0.834, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.33617328519855594, | |
| "grad_norm": 0.027299024164676666, | |
| "learning_rate": 4.106488314976513e-06, | |
| "loss": 0.8667, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.33732851985559564, | |
| "grad_norm": 0.027205299586057663, | |
| "learning_rate": 4.108971419080698e-06, | |
| "loss": 0.8548, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3384837545126354, | |
| "grad_norm": 0.025082072243094444, | |
| "learning_rate": 4.111446033923516e-06, | |
| "loss": 0.8418, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3396389891696751, | |
| "grad_norm": 0.025831829756498337, | |
| "learning_rate": 4.113912217353596e-06, | |
| "loss": 0.8419, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3407942238267148, | |
| "grad_norm": 0.02547086775302887, | |
| "learning_rate": 4.1163700266302726e-06, | |
| "loss": 0.8708, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3419494584837545, | |
| "grad_norm": 0.02637997455894947, | |
| "learning_rate": 4.118819518431564e-06, | |
| "loss": 0.8492, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3431046931407942, | |
| "grad_norm": 0.02541852556169033, | |
| "learning_rate": 4.121260748862021e-06, | |
| "loss": 0.8607, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.34425992779783393, | |
| "grad_norm": 0.026499446481466293, | |
| "learning_rate": 4.123693773460426e-06, | |
| "loss": 0.8543, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.34541516245487364, | |
| "grad_norm": 0.025943726301193237, | |
| "learning_rate": 4.126118647207383e-06, | |
| "loss": 0.852, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.34657039711191334, | |
| "grad_norm": 0.024514541029930115, | |
| "learning_rate": 4.128535424532771e-06, | |
| "loss": 0.8335, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.34772563176895305, | |
| "grad_norm": 0.026036018505692482, | |
| "learning_rate": 4.130944159323072e-06, | |
| "loss": 0.8373, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.34888086642599275, | |
| "grad_norm": 0.0261723380535841, | |
| "learning_rate": 4.133344904928585e-06, | |
| "loss": 0.839, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3500361010830325, | |
| "grad_norm": 0.025829095393419266, | |
| "learning_rate": 4.135737714170509e-06, | |
| "loss": 0.8691, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3511913357400722, | |
| "grad_norm": 0.026276499032974243, | |
| "learning_rate": 4.138122639347924e-06, | |
| "loss": 0.839, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3523465703971119, | |
| "grad_norm": 0.026492850854992867, | |
| "learning_rate": 4.140499732244644e-06, | |
| "loss": 0.8585, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.35350180505415163, | |
| "grad_norm": 0.027172986418008804, | |
| "learning_rate": 4.142869044135967e-06, | |
| "loss": 0.8369, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.35465703971119134, | |
| "grad_norm": 0.026907015591859818, | |
| "learning_rate": 4.145230625795312e-06, | |
| "loss": 0.8578, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.35581227436823104, | |
| "grad_norm": 0.02687431313097477, | |
| "learning_rate": 4.147584527500741e-06, | |
| "loss": 0.8727, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.35696750902527075, | |
| "grad_norm": 0.026860354468226433, | |
| "learning_rate": 4.149930799041391e-06, | |
| "loss": 0.8026, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.35812274368231045, | |
| "grad_norm": 0.02500147931277752, | |
| "learning_rate": 4.152269489723789e-06, | |
| "loss": 0.8309, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.35927797833935016, | |
| "grad_norm": 0.026802562177181244, | |
| "learning_rate": 4.154600648378063e-06, | |
| "loss": 0.8524, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3604332129963899, | |
| "grad_norm": 0.026412485167384148, | |
| "learning_rate": 4.156924323364072e-06, | |
| "loss": 0.8485, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3615884476534296, | |
| "grad_norm": 0.027881808578968048, | |
| "learning_rate": 4.159240562577414e-06, | |
| "loss": 0.8654, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.36274368231046933, | |
| "grad_norm": 0.026209615170955658, | |
| "learning_rate": 4.1615494134553584e-06, | |
| "loss": 0.8492, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.36389891696750903, | |
| "grad_norm": 0.026174476370215416, | |
| "learning_rate": 4.163850922982668e-06, | |
| "loss": 0.818, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.36505415162454874, | |
| "grad_norm": 0.02707611583173275, | |
| "learning_rate": 4.166145137697341e-06, | |
| "loss": 0.8586, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.36620938628158844, | |
| "grad_norm": 0.02789073996245861, | |
| "learning_rate": 4.1684321036962525e-06, | |
| "loss": 0.8468, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.36736462093862815, | |
| "grad_norm": 0.02748328261077404, | |
| "learning_rate": 4.170711866640721e-06, | |
| "loss": 0.8688, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.36851985559566786, | |
| "grad_norm": 0.029765894636511803, | |
| "learning_rate": 4.172984471761969e-06, | |
| "loss": 0.8521, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.36967509025270756, | |
| "grad_norm": 0.025119414553046227, | |
| "learning_rate": 4.17524996386651e-06, | |
| "loss": 0.829, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.37083032490974727, | |
| "grad_norm": 0.028743257746100426, | |
| "learning_rate": 4.177508387341454e-06, | |
| "loss": 0.8676, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.371985559566787, | |
| "grad_norm": 0.02732737548649311, | |
| "learning_rate": 4.179759786159719e-06, | |
| "loss": 0.8516, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.37314079422382673, | |
| "grad_norm": 0.026275500655174255, | |
| "learning_rate": 4.182004203885172e-06, | |
| "loss": 0.8606, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.37429602888086644, | |
| "grad_norm": 0.028016693890094757, | |
| "learning_rate": 4.1842416836776876e-06, | |
| "loss": 0.8272, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.37545126353790614, | |
| "grad_norm": 0.026512511074543, | |
| "learning_rate": 4.1864722682981245e-06, | |
| "loss": 0.8438, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.37660649819494585, | |
| "grad_norm": 0.026299525052309036, | |
| "learning_rate": 4.188696000113232e-06, | |
| "loss": 0.8485, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.37776173285198555, | |
| "grad_norm": 0.02613169699907303, | |
| "learning_rate": 4.190912921100477e-06, | |
| "loss": 0.8184, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.37891696750902526, | |
| "grad_norm": 0.02610696479678154, | |
| "learning_rate": 4.193123072852799e-06, | |
| "loss": 0.8609, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.38007220216606497, | |
| "grad_norm": 0.026195967569947243, | |
| "learning_rate": 4.1953264965832905e-06, | |
| "loss": 0.8302, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.38122743682310467, | |
| "grad_norm": 0.026481349021196365, | |
| "learning_rate": 4.197523233129813e-06, | |
| "loss": 0.8491, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.38238267148014443, | |
| "grad_norm": 0.024858448654413223, | |
| "learning_rate": 4.199713322959531e-06, | |
| "loss": 0.8404, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.38353790613718414, | |
| "grad_norm": 0.026140978559851646, | |
| "learning_rate": 4.201896806173394e-06, | |
| "loss": 0.8558, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.38469314079422384, | |
| "grad_norm": 0.026811236515641212, | |
| "learning_rate": 4.204073722510534e-06, | |
| "loss": 0.8747, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.38584837545126355, | |
| "grad_norm": 0.0256299190223217, | |
| "learning_rate": 4.206244111352608e-06, | |
| "loss": 0.8521, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.38700361010830325, | |
| "grad_norm": 0.026677099987864494, | |
| "learning_rate": 4.208408011728075e-06, | |
| "loss": 0.8548, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.38815884476534296, | |
| "grad_norm": 0.026028303429484367, | |
| "learning_rate": 4.210565462316407e-06, | |
| "loss": 0.8474, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.38931407942238266, | |
| "grad_norm": 0.02715076506137848, | |
| "learning_rate": 4.212716501452232e-06, | |
| "loss": 0.8318, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.39046931407942237, | |
| "grad_norm": 0.02765047550201416, | |
| "learning_rate": 4.214861167129425e-06, | |
| "loss": 0.8558, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.3916245487364621, | |
| "grad_norm": 0.025798538699746132, | |
| "learning_rate": 4.216999497005137e-06, | |
| "loss": 0.8436, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.3927797833935018, | |
| "grad_norm": 0.028038429096341133, | |
| "learning_rate": 4.219131528403759e-06, | |
| "loss": 0.845, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.39393501805054154, | |
| "grad_norm": 0.026474367827177048, | |
| "learning_rate": 4.22125729832083e-06, | |
| "loss": 0.8411, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.39509025270758125, | |
| "grad_norm": 0.025686634704470634, | |
| "learning_rate": 4.223376843426892e-06, | |
| "loss": 0.8485, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.39624548736462095, | |
| "grad_norm": 0.026812469586730003, | |
| "learning_rate": 4.225490200071284e-06, | |
| "loss": 0.8571, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.39740072202166066, | |
| "grad_norm": 0.02608347311615944, | |
| "learning_rate": 4.227597404285883e-06, | |
| "loss": 0.8426, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.39855595667870036, | |
| "grad_norm": 0.02631702460348606, | |
| "learning_rate": 4.229698491788791e-06, | |
| "loss": 0.876, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.39971119133574007, | |
| "grad_norm": 0.0272465068846941, | |
| "learning_rate": 4.231793497987962e-06, | |
| "loss": 0.861, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.4008664259927798, | |
| "grad_norm": 0.02596902847290039, | |
| "learning_rate": 4.233882457984791e-06, | |
| "loss": 0.8645, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.4020216606498195, | |
| "grad_norm": 0.02580340951681137, | |
| "learning_rate": 4.235965406577636e-06, | |
| "loss": 0.8447, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.4031768953068592, | |
| "grad_norm": 0.027888286858797073, | |
| "learning_rate": 4.2380423782653e-06, | |
| "loss": 0.8425, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.4043321299638989, | |
| "grad_norm": 0.025501729920506477, | |
| "learning_rate": 4.2401134072504595e-06, | |
| "loss": 0.8307, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.40548736462093865, | |
| "grad_norm": 0.026491057127714157, | |
| "learning_rate": 4.24217852744304e-06, | |
| "loss": 0.8237, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.40664259927797836, | |
| "grad_norm": 0.027327535673975945, | |
| "learning_rate": 4.244237772463552e-06, | |
| "loss": 0.8605, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.40779783393501806, | |
| "grad_norm": 0.026969779282808304, | |
| "learning_rate": 4.246291175646372e-06, | |
| "loss": 0.8357, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.40895306859205777, | |
| "grad_norm": 0.02584182471036911, | |
| "learning_rate": 4.2483387700429804e-06, | |
| "loss": 0.8348, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.41010830324909747, | |
| "grad_norm": 0.025704173371195793, | |
| "learning_rate": 4.2503805884251575e-06, | |
| "loss": 0.8392, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.4112635379061372, | |
| "grad_norm": 0.026744280010461807, | |
| "learning_rate": 4.252416663288126e-06, | |
| "loss": 0.8405, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4124187725631769, | |
| "grad_norm": 0.02655099704861641, | |
| "learning_rate": 4.2544470268536555e-06, | |
| "loss": 0.8374, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.4135740072202166, | |
| "grad_norm": 0.02521936595439911, | |
| "learning_rate": 4.2564717110731246e-06, | |
| "loss": 0.8442, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.4147292418772563, | |
| "grad_norm": 0.02607394941151142, | |
| "learning_rate": 4.258490747630532e-06, | |
| "loss": 0.8273, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.41588447653429605, | |
| "grad_norm": 0.027000917121767998, | |
| "learning_rate": 4.26050416794548e-06, | |
| "loss": 0.8664, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.41703971119133576, | |
| "grad_norm": 0.026261812075972557, | |
| "learning_rate": 4.262512003176097e-06, | |
| "loss": 0.8351, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.41819494584837547, | |
| "grad_norm": 0.025927625596523285, | |
| "learning_rate": 4.264514284221944e-06, | |
| "loss": 0.8419, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.41935018050541517, | |
| "grad_norm": 0.02672671154141426, | |
| "learning_rate": 4.2665110417268545e-06, | |
| "loss": 0.84, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.4205054151624549, | |
| "grad_norm": 0.02695775218307972, | |
| "learning_rate": 4.26850230608176e-06, | |
| "loss": 0.8483, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4216606498194946, | |
| "grad_norm": 0.026013897731900215, | |
| "learning_rate": 4.270488107427459e-06, | |
| "loss": 0.8371, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.4228158844765343, | |
| "grad_norm": 0.026113096624612808, | |
| "learning_rate": 4.272468475657351e-06, | |
| "loss": 0.8448, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.423971119133574, | |
| "grad_norm": 0.02611883170902729, | |
| "learning_rate": 4.27444344042015e-06, | |
| "loss": 0.8522, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.4251263537906137, | |
| "grad_norm": 0.02588835172355175, | |
| "learning_rate": 4.27641303112253e-06, | |
| "loss": 0.8348, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4262815884476534, | |
| "grad_norm": 0.026020022109150887, | |
| "learning_rate": 4.278377276931768e-06, | |
| "loss": 0.8376, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.42743682310469316, | |
| "grad_norm": 0.026923442259430885, | |
| "learning_rate": 4.280336206778326e-06, | |
| "loss": 0.8435, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.42859205776173287, | |
| "grad_norm": 0.026913795620203018, | |
| "learning_rate": 4.2822898493584105e-06, | |
| "loss": 0.8374, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.4297472924187726, | |
| "grad_norm": 0.025143541395664215, | |
| "learning_rate": 4.2842382331364965e-06, | |
| "loss": 0.8169, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.4309025270758123, | |
| "grad_norm": 0.027908481657505035, | |
| "learning_rate": 4.2861813863478135e-06, | |
| "loss": 0.8561, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.432057761732852, | |
| "grad_norm": 0.026535294950008392, | |
| "learning_rate": 4.288119337000801e-06, | |
| "loss": 0.8299, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4332129963898917, | |
| "grad_norm": 0.027421629056334496, | |
| "learning_rate": 4.290052112879532e-06, | |
| "loss": 0.8273, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4343682310469314, | |
| "grad_norm": 0.02671075239777565, | |
| "learning_rate": 4.291979741546102e-06, | |
| "loss": 0.8215, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4355234657039711, | |
| "grad_norm": 0.025190116837620735, | |
| "learning_rate": 4.293902250342989e-06, | |
| "loss": 0.871, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4366787003610108, | |
| "grad_norm": 0.026498563587665558, | |
| "learning_rate": 4.295819666395376e-06, | |
| "loss": 0.8582, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.43783393501805057, | |
| "grad_norm": 0.02722996659576893, | |
| "learning_rate": 4.297732016613455e-06, | |
| "loss": 0.8288, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4389891696750903, | |
| "grad_norm": 0.028074050322175026, | |
| "learning_rate": 4.299639327694684e-06, | |
| "loss": 0.8574, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.44014440433213, | |
| "grad_norm": 0.026562221348285675, | |
| "learning_rate": 4.301541626126033e-06, | |
| "loss": 0.8337, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.4412996389891697, | |
| "grad_norm": 0.025497550144791603, | |
| "learning_rate": 4.303438938186182e-06, | |
| "loss": 0.8388, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.4424548736462094, | |
| "grad_norm": 0.028150785714387894, | |
| "learning_rate": 4.305331289947705e-06, | |
| "loss": 0.8381, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.4436101083032491, | |
| "grad_norm": 0.024889659136533737, | |
| "learning_rate": 4.307218707279219e-06, | |
| "loss": 0.8297, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4447653429602888, | |
| "grad_norm": 0.02572575956583023, | |
| "learning_rate": 4.309101215847502e-06, | |
| "loss": 0.8464, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4459205776173285, | |
| "grad_norm": 0.028156662359833717, | |
| "learning_rate": 4.310978841119592e-06, | |
| "loss": 0.8307, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.4470758122743682, | |
| "grad_norm": 0.02584012597799301, | |
| "learning_rate": 4.312851608364853e-06, | |
| "loss": 0.8342, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.4482310469314079, | |
| "grad_norm": 0.02681022137403488, | |
| "learning_rate": 4.3147195426570124e-06, | |
| "loss": 0.839, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.4493862815884477, | |
| "grad_norm": 0.028267353773117065, | |
| "learning_rate": 4.31658266887618e-06, | |
| "loss": 0.8602, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4505415162454874, | |
| "grad_norm": 0.026196565479040146, | |
| "learning_rate": 4.318441011710832e-06, | |
| "loss": 0.8374, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4516967509025271, | |
| "grad_norm": 0.028164999559521675, | |
| "learning_rate": 4.3202945956597785e-06, | |
| "loss": 0.8458, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.4528519855595668, | |
| "grad_norm": 0.026657233014702797, | |
| "learning_rate": 4.322143445034095e-06, | |
| "loss": 0.8238, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4540072202166065, | |
| "grad_norm": 0.025035889819264412, | |
| "learning_rate": 4.323987583959045e-06, | |
| "loss": 0.8371, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.4551624548736462, | |
| "grad_norm": 0.02667805179953575, | |
| "learning_rate": 4.325827036375958e-06, | |
| "loss": 0.8201, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4563176895306859, | |
| "grad_norm": 0.028383539989590645, | |
| "learning_rate": 4.327661826044101e-06, | |
| "loss": 0.8385, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4574729241877256, | |
| "grad_norm": 0.025015531107783318, | |
| "learning_rate": 4.329491976542521e-06, | |
| "loss": 0.8013, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4586281588447653, | |
| "grad_norm": 0.0283355675637722, | |
| "learning_rate": 4.3313175112718595e-06, | |
| "loss": 0.8228, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.459783393501805, | |
| "grad_norm": 0.026209138333797455, | |
| "learning_rate": 4.333138453456147e-06, | |
| "loss": 0.8336, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4609386281588448, | |
| "grad_norm": 0.024707714095711708, | |
| "learning_rate": 4.334954826144581e-06, | |
| "loss": 0.8417, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.4620938628158845, | |
| "grad_norm": 0.02748985029757023, | |
| "learning_rate": 4.336766652213271e-06, | |
| "loss": 0.843, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4632490974729242, | |
| "grad_norm": 0.02513727732002735, | |
| "learning_rate": 4.3385739543669715e-06, | |
| "loss": 0.8301, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.4644043321299639, | |
| "grad_norm": 0.028213754296302795, | |
| "learning_rate": 4.340376755140784e-06, | |
| "loss": 0.8497, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4655595667870036, | |
| "grad_norm": 0.02582547254860401, | |
| "learning_rate": 4.34217507690185e-06, | |
| "loss": 0.8252, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4667148014440433, | |
| "grad_norm": 0.028116989880800247, | |
| "learning_rate": 4.343968941851009e-06, | |
| "loss": 0.8312, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.467870036101083, | |
| "grad_norm": 0.026428736746311188, | |
| "learning_rate": 4.345758372024448e-06, | |
| "loss": 0.8177, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4690252707581227, | |
| "grad_norm": 0.025875557214021683, | |
| "learning_rate": 4.347543389295324e-06, | |
| "loss": 0.8258, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.47018050541516243, | |
| "grad_norm": 0.025924712419509888, | |
| "learning_rate": 4.3493240153753665e-06, | |
| "loss": 0.8379, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4713357400722022, | |
| "grad_norm": 0.025729961693286896, | |
| "learning_rate": 4.3511002718164665e-06, | |
| "loss": 0.8396, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4724909747292419, | |
| "grad_norm": 0.026374271139502525, | |
| "learning_rate": 4.352872180012237e-06, | |
| "loss": 0.8381, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.4736462093862816, | |
| "grad_norm": 0.028471384197473526, | |
| "learning_rate": 4.35463976119956e-06, | |
| "loss": 0.8333, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4748014440433213, | |
| "grad_norm": 0.025666672736406326, | |
| "learning_rate": 4.356403036460116e-06, | |
| "loss": 0.8191, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.475956678700361, | |
| "grad_norm": 0.026298929005861282, | |
| "learning_rate": 4.358162026721892e-06, | |
| "loss": 0.8225, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.4771119133574007, | |
| "grad_norm": 0.027312815189361572, | |
| "learning_rate": 4.359916752760669e-06, | |
| "loss": 0.804, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.4782671480144404, | |
| "grad_norm": 0.02659662254154682, | |
| "learning_rate": 4.361667235201499e-06, | |
| "loss": 0.8286, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.47942238267148013, | |
| "grad_norm": 0.02585594914853573, | |
| "learning_rate": 4.363413494520154e-06, | |
| "loss": 0.8229, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.48057761732851983, | |
| "grad_norm": 0.027580194175243378, | |
| "learning_rate": 4.365155551044572e-06, | |
| "loss": 0.8397, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.48173285198555954, | |
| "grad_norm": 0.025151105597615242, | |
| "learning_rate": 4.366893424956263e-06, | |
| "loss": 0.8356, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.4828880866425993, | |
| "grad_norm": 0.025705143809318542, | |
| "learning_rate": 4.368627136291726e-06, | |
| "loss": 0.8342, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.484043321299639, | |
| "grad_norm": 0.027614813297986984, | |
| "learning_rate": 4.370356704943825e-06, | |
| "loss": 0.8481, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.4851985559566787, | |
| "grad_norm": 0.026671946048736572, | |
| "learning_rate": 4.372082150663167e-06, | |
| "loss": 0.8375, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4863537906137184, | |
| "grad_norm": 0.026453910395503044, | |
| "learning_rate": 4.3738034930594475e-06, | |
| "loss": 0.8259, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4875090252707581, | |
| "grad_norm": 0.027380600571632385, | |
| "learning_rate": 4.3755207516027904e-06, | |
| "loss": 0.8394, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.48866425992779783, | |
| "grad_norm": 0.02569480426609516, | |
| "learning_rate": 4.3772339456250705e-06, | |
| "loss": 0.82, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.48981949458483753, | |
| "grad_norm": 0.028223766013979912, | |
| "learning_rate": 4.3789430943212215e-06, | |
| "loss": 0.8223, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.49097472924187724, | |
| "grad_norm": 0.025967473164200783, | |
| "learning_rate": 4.38064821675052e-06, | |
| "loss": 0.8316, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.49212996389891694, | |
| "grad_norm": 0.026815980672836304, | |
| "learning_rate": 4.382349331837865e-06, | |
| "loss": 0.8497, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4932851985559567, | |
| "grad_norm": 0.026486823335289955, | |
| "learning_rate": 4.38404645837504e-06, | |
| "loss": 0.8224, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.4944404332129964, | |
| "grad_norm": 0.026307394728064537, | |
| "learning_rate": 4.385739615021954e-06, | |
| "loss": 0.8055, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4955956678700361, | |
| "grad_norm": 0.02565447799861431, | |
| "learning_rate": 4.387428820307874e-06, | |
| "loss": 0.85, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4967509025270758, | |
| "grad_norm": 0.026436088606715202, | |
| "learning_rate": 4.389114092632645e-06, | |
| "loss": 0.8343, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4979061371841155, | |
| "grad_norm": 0.028431309387087822, | |
| "learning_rate": 4.390795450267887e-06, | |
| "loss": 0.8457, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.49906137184115523, | |
| "grad_norm": 0.026016119867563248, | |
| "learning_rate": 4.392472911358188e-06, | |
| "loss": 0.8307, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.500216606498195, | |
| "grad_norm": 0.02676302008330822, | |
| "learning_rate": 4.394146493922276e-06, | |
| "loss": 0.8565, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.5013718411552347, | |
| "grad_norm": 0.026580410078167915, | |
| "learning_rate": 4.395816215854185e-06, | |
| "loss": 0.8364, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5025270758122744, | |
| "grad_norm": 0.02553938329219818, | |
| "learning_rate": 4.397482094924396e-06, | |
| "loss": 0.8266, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.5036823104693141, | |
| "grad_norm": 0.02656836435198784, | |
| "learning_rate": 4.3991441487809775e-06, | |
| "loss": 0.8181, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5048375451263538, | |
| "grad_norm": 0.025670040398836136, | |
| "learning_rate": 4.400802394950703e-06, | |
| "loss": 0.7895, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.5059927797833935, | |
| "grad_norm": 0.02619147300720215, | |
| "learning_rate": 4.402456850840167e-06, | |
| "loss": 0.8364, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5071480144404332, | |
| "grad_norm": 0.025901198387145996, | |
| "learning_rate": 4.4041075337368695e-06, | |
| "loss": 0.8449, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.5083032490974729, | |
| "grad_norm": 0.02623150125145912, | |
| "learning_rate": 4.405754460810312e-06, | |
| "loss": 0.8422, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5094584837545126, | |
| "grad_norm": 0.025403983891010284, | |
| "learning_rate": 4.407397649113065e-06, | |
| "loss": 0.851, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.5106137184115523, | |
| "grad_norm": 0.027808185666799545, | |
| "learning_rate": 4.40903711558182e-06, | |
| "loss": 0.8238, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.511768953068592, | |
| "grad_norm": 0.025581343099474907, | |
| "learning_rate": 4.41067287703845e-06, | |
| "loss": 0.848, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.5129241877256318, | |
| "grad_norm": 0.026919787749648094, | |
| "learning_rate": 4.4123049501910335e-06, | |
| "loss": 0.8171, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5140794223826715, | |
| "grad_norm": 0.025836626067757607, | |
| "learning_rate": 4.4139333516348865e-06, | |
| "loss": 0.8338, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.5152346570397112, | |
| "grad_norm": 0.026238281279802322, | |
| "learning_rate": 4.415558097853571e-06, | |
| "loss": 0.8164, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5163898916967509, | |
| "grad_norm": 0.02682877704501152, | |
| "learning_rate": 4.4171792052198945e-06, | |
| "loss": 0.8414, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.5175451263537906, | |
| "grad_norm": 0.02541991136968136, | |
| "learning_rate": 4.418796689996907e-06, | |
| "loss": 0.8043, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5187003610108303, | |
| "grad_norm": 0.026446549221873283, | |
| "learning_rate": 4.420410568338873e-06, | |
| "loss": 0.8499, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.51985559566787, | |
| "grad_norm": 0.02652100659906864, | |
| "learning_rate": 4.42202085629224e-06, | |
| "loss": 0.8331, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5210108303249098, | |
| "grad_norm": 0.025452058762311935, | |
| "learning_rate": 4.423627569796601e-06, | |
| "loss": 0.8265, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.5221660649819495, | |
| "grad_norm": 0.025869838893413544, | |
| "learning_rate": 4.425230724685638e-06, | |
| "loss": 0.8276, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5233212996389892, | |
| "grad_norm": 0.026051780208945274, | |
| "learning_rate": 4.426830336688054e-06, | |
| "loss": 0.8575, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.5244765342960289, | |
| "grad_norm": 0.02754388563334942, | |
| "learning_rate": 4.428426421428507e-06, | |
| "loss": 0.8255, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5256317689530686, | |
| "grad_norm": 0.02607525885105133, | |
| "learning_rate": 4.430018994428521e-06, | |
| "loss": 0.8397, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5267870036101083, | |
| "grad_norm": 0.026636334136128426, | |
| "learning_rate": 4.431608071107392e-06, | |
| "loss": 0.8377, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.527942238267148, | |
| "grad_norm": 0.026849418878555298, | |
| "learning_rate": 4.433193666783084e-06, | |
| "loss": 0.8257, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5290974729241877, | |
| "grad_norm": 0.02537579834461212, | |
| "learning_rate": 4.434775796673116e-06, | |
| "loss": 0.8455, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5302527075812274, | |
| "grad_norm": 0.025962024927139282, | |
| "learning_rate": 4.4363544758954355e-06, | |
| "loss": 0.8263, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5314079422382672, | |
| "grad_norm": 0.027943387627601624, | |
| "learning_rate": 4.437929719469291e-06, | |
| "loss": 0.8546, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5325631768953069, | |
| "grad_norm": 0.02550574764609337, | |
| "learning_rate": 4.439501542316081e-06, | |
| "loss": 0.8367, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5337184115523466, | |
| "grad_norm": 0.030560094863176346, | |
| "learning_rate": 4.44106995926021e-06, | |
| "loss": 0.8443, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5348736462093863, | |
| "grad_norm": 0.025380682200193405, | |
| "learning_rate": 4.442634985029922e-06, | |
| "loss": 0.8341, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.536028880866426, | |
| "grad_norm": 0.027356211096048355, | |
| "learning_rate": 4.444196634258136e-06, | |
| "loss": 0.8427, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5371841155234657, | |
| "grad_norm": 0.027884816750884056, | |
| "learning_rate": 4.445754921483257e-06, | |
| "loss": 0.8441, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5383393501805054, | |
| "grad_norm": 0.025398530066013336, | |
| "learning_rate": 4.44730986115e-06, | |
| "loss": 0.8342, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5394945848375451, | |
| "grad_norm": 0.027301618829369545, | |
| "learning_rate": 4.448861467610187e-06, | |
| "loss": 0.8453, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.5406498194945848, | |
| "grad_norm": 0.025283178314566612, | |
| "learning_rate": 4.450409755123541e-06, | |
| "loss": 0.8468, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5418050541516245, | |
| "grad_norm": 0.025396686047315598, | |
| "learning_rate": 4.4519547378584725e-06, | |
| "loss": 0.8352, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5429602888086643, | |
| "grad_norm": 0.026122555136680603, | |
| "learning_rate": 4.453496429892863e-06, | |
| "loss": 0.8255, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.544115523465704, | |
| "grad_norm": 0.026710988953709602, | |
| "learning_rate": 4.455034845214828e-06, | |
| "loss": 0.8033, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.5452707581227437, | |
| "grad_norm": 0.027530129998922348, | |
| "learning_rate": 4.45656999772348e-06, | |
| "loss": 0.827, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5464259927797834, | |
| "grad_norm": 0.02589859999716282, | |
| "learning_rate": 4.458101901229686e-06, | |
| "loss": 0.8585, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5475812274368231, | |
| "grad_norm": 0.026990870013833046, | |
| "learning_rate": 4.459630569456809e-06, | |
| "loss": 0.8228, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5487364620938628, | |
| "grad_norm": 0.026618408039212227, | |
| "learning_rate": 4.461156016041445e-06, | |
| "loss": 0.8308, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5498916967509025, | |
| "grad_norm": 0.026187585666775703, | |
| "learning_rate": 4.4626782545341565e-06, | |
| "loss": 0.841, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5510469314079423, | |
| "grad_norm": 0.024839429184794426, | |
| "learning_rate": 4.4641972984001906e-06, | |
| "loss": 0.8265, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.552202166064982, | |
| "grad_norm": 0.02818799950182438, | |
| "learning_rate": 4.465713161020199e-06, | |
| "loss": 0.8231, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5533574007220217, | |
| "grad_norm": 0.02686317451298237, | |
| "learning_rate": 4.46722585569094e-06, | |
| "loss": 0.8266, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5545126353790614, | |
| "grad_norm": 0.02528228797018528, | |
| "learning_rate": 4.468735395625979e-06, | |
| "loss": 0.8296, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5556678700361011, | |
| "grad_norm": 0.02689657174050808, | |
| "learning_rate": 4.470241793956387e-06, | |
| "loss": 0.8345, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5568231046931408, | |
| "grad_norm": 0.028286360204219818, | |
| "learning_rate": 4.471745063731417e-06, | |
| "loss": 0.8371, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5579783393501805, | |
| "grad_norm": 0.0263553187251091, | |
| "learning_rate": 4.473245217919187e-06, | |
| "loss": 0.8183, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5591335740072202, | |
| "grad_norm": 0.027919506654143333, | |
| "learning_rate": 4.474742269407355e-06, | |
| "loss": 0.8317, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5602888086642599, | |
| "grad_norm": 0.02882731519639492, | |
| "learning_rate": 4.476236231003773e-06, | |
| "loss": 0.8414, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5614440433212996, | |
| "grad_norm": 0.02561136521399021, | |
| "learning_rate": 4.477727115437156e-06, | |
| "loss": 0.8004, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5625992779783393, | |
| "grad_norm": 0.02664412185549736, | |
| "learning_rate": 4.479214935357724e-06, | |
| "loss": 0.7967, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.563754512635379, | |
| "grad_norm": 0.028001073747873306, | |
| "learning_rate": 4.480699703337852e-06, | |
| "loss": 0.841, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5649097472924187, | |
| "grad_norm": 0.026740211993455887, | |
| "learning_rate": 4.4821814318727016e-06, | |
| "loss": 0.8369, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5660649819494585, | |
| "grad_norm": 0.024850796908140182, | |
| "learning_rate": 4.4836601333808566e-06, | |
| "loss": 0.8405, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5672202166064982, | |
| "grad_norm": 0.025452926754951477, | |
| "learning_rate": 4.485135820204948e-06, | |
| "loss": 0.8106, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.568375451263538, | |
| "grad_norm": 0.026447713375091553, | |
| "learning_rate": 4.4866085046122675e-06, | |
| "loss": 0.7992, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5695306859205777, | |
| "grad_norm": 0.027519937604665756, | |
| "learning_rate": 4.488078198795384e-06, | |
| "loss": 0.8227, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5706859205776174, | |
| "grad_norm": 0.026447774842381477, | |
| "learning_rate": 4.4895449148727455e-06, | |
| "loss": 0.8377, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5718411552346571, | |
| "grad_norm": 0.02749788761138916, | |
| "learning_rate": 4.491008664889282e-06, | |
| "loss": 0.789, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5729963898916968, | |
| "grad_norm": 0.025579964742064476, | |
| "learning_rate": 4.492469460816997e-06, | |
| "loss": 0.8288, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5741516245487365, | |
| "grad_norm": 0.027195453643798828, | |
| "learning_rate": 4.493927314555554e-06, | |
| "loss": 0.8194, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5753068592057762, | |
| "grad_norm": 0.027325714007019997, | |
| "learning_rate": 4.495382237932863e-06, | |
| "loss": 0.8136, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5764620938628159, | |
| "grad_norm": 0.026480497792363167, | |
| "learning_rate": 4.496834242705651e-06, | |
| "loss": 0.8239, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5776173285198556, | |
| "grad_norm": 0.026893138885498047, | |
| "learning_rate": 4.498283340560032e-06, | |
| "loss": 0.8234, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5787725631768953, | |
| "grad_norm": 0.02725539728999138, | |
| "learning_rate": 4.4997295431120764e-06, | |
| "loss": 0.8167, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.579927797833935, | |
| "grad_norm": 0.027873387560248375, | |
| "learning_rate": 4.5011728619083665e-06, | |
| "loss": 0.8012, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5810830324909747, | |
| "grad_norm": 0.027902130037546158, | |
| "learning_rate": 4.502613308426546e-06, | |
| "loss": 0.8248, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5822382671480144, | |
| "grad_norm": 0.027641048654913902, | |
| "learning_rate": 4.504050894075876e-06, | |
| "loss": 0.8298, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5833935018050541, | |
| "grad_norm": 0.026641003787517548, | |
| "learning_rate": 4.5054856301977696e-06, | |
| "loss": 0.8228, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5845487364620938, | |
| "grad_norm": 0.028835156932473183, | |
| "learning_rate": 4.506917528066332e-06, | |
| "loss": 0.8222, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5857039711191335, | |
| "grad_norm": 0.02770557627081871, | |
| "learning_rate": 4.5083465988888945e-06, | |
| "loss": 0.8455, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5868592057761732, | |
| "grad_norm": 0.0266465675085783, | |
| "learning_rate": 4.509772853806533e-06, | |
| "loss": 0.8269, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.588014440433213, | |
| "grad_norm": 0.029550906270742416, | |
| "learning_rate": 4.511196303894598e-06, | |
| "loss": 0.817, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5891696750902528, | |
| "grad_norm": 0.025802530348300934, | |
| "learning_rate": 4.512616960163228e-06, | |
| "loss": 0.8352, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5903249097472925, | |
| "grad_norm": 0.02622455172240734, | |
| "learning_rate": 4.514034833557855e-06, | |
| "loss": 0.8209, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5914801444043322, | |
| "grad_norm": 0.028669588267803192, | |
| "learning_rate": 4.515449934959719e-06, | |
| "loss": 0.8199, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5926353790613719, | |
| "grad_norm": 0.02638787031173706, | |
| "learning_rate": 4.516862275186361e-06, | |
| "loss": 0.8396, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5937906137184116, | |
| "grad_norm": 0.025254135951399803, | |
| "learning_rate": 4.518271864992127e-06, | |
| "loss": 0.8314, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5949458483754513, | |
| "grad_norm": 0.026823000982403755, | |
| "learning_rate": 4.519678715068652e-06, | |
| "loss": 0.8063, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.596101083032491, | |
| "grad_norm": 0.029466571286320686, | |
| "learning_rate": 4.521082836045353e-06, | |
| "loss": 0.8279, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5972563176895307, | |
| "grad_norm": 0.02610476315021515, | |
| "learning_rate": 4.5224842384899045e-06, | |
| "loss": 0.8217, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5984115523465704, | |
| "grad_norm": 0.027522264048457146, | |
| "learning_rate": 4.523882932908722e-06, | |
| "loss": 0.8465, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5995667870036101, | |
| "grad_norm": 0.025692617520689964, | |
| "learning_rate": 4.52527892974743e-06, | |
| "loss": 0.8448, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.6007220216606498, | |
| "grad_norm": 0.027035709470510483, | |
| "learning_rate": 4.526672239391333e-06, | |
| "loss": 0.8216, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6018772563176895, | |
| "grad_norm": 0.026100315153598785, | |
| "learning_rate": 4.528062872165875e-06, | |
| "loss": 0.8007, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.6030324909747292, | |
| "grad_norm": 0.025961345061659813, | |
| "learning_rate": 4.529450838337104e-06, | |
| "loss": 0.8185, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6041877256317689, | |
| "grad_norm": 0.02683369629085064, | |
| "learning_rate": 4.5308361481121244e-06, | |
| "loss": 0.8377, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.6053429602888086, | |
| "grad_norm": 0.02755955420434475, | |
| "learning_rate": 4.532218811639545e-06, | |
| "loss": 0.8197, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6064981949458483, | |
| "grad_norm": 0.02744685672223568, | |
| "learning_rate": 4.533598839009929e-06, | |
| "loss": 0.8146, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.607653429602888, | |
| "grad_norm": 0.026420993730425835, | |
| "learning_rate": 4.5349762402562325e-06, | |
| "loss": 0.8324, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6088086642599277, | |
| "grad_norm": 0.026225924491882324, | |
| "learning_rate": 4.5363510253542444e-06, | |
| "loss": 0.8299, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.6099638989169676, | |
| "grad_norm": 0.025415120646357536, | |
| "learning_rate": 4.537723204223021e-06, | |
| "loss": 0.8373, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6111191335740073, | |
| "grad_norm": 0.026079954579472542, | |
| "learning_rate": 4.53909278672531e-06, | |
| "loss": 0.8064, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.612274368231047, | |
| "grad_norm": 0.027824481949210167, | |
| "learning_rate": 4.540459782667983e-06, | |
| "loss": 0.834, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6134296028880867, | |
| "grad_norm": 0.02630164660513401, | |
| "learning_rate": 4.541824201802449e-06, | |
| "loss": 0.8341, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.6145848375451264, | |
| "grad_norm": 0.025940794497728348, | |
| "learning_rate": 4.543186053825081e-06, | |
| "loss": 0.8356, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.6157400722021661, | |
| "grad_norm": 0.025478770956397057, | |
| "learning_rate": 4.544545348377621e-06, | |
| "loss": 0.8118, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.6168953068592058, | |
| "grad_norm": 0.02738497406244278, | |
| "learning_rate": 4.545902095047594e-06, | |
| "loss": 0.8397, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6180505415162455, | |
| "grad_norm": 0.025361735373735428, | |
| "learning_rate": 4.547256303368714e-06, | |
| "loss": 0.8422, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.6192057761732852, | |
| "grad_norm": 0.02575971558690071, | |
| "learning_rate": 4.5486079828212835e-06, | |
| "loss": 0.8224, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6203610108303249, | |
| "grad_norm": 0.026920614764094353, | |
| "learning_rate": 4.549957142832593e-06, | |
| "loss": 0.7988, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.6215162454873646, | |
| "grad_norm": 0.026717036962509155, | |
| "learning_rate": 4.5513037927773155e-06, | |
| "loss": 0.8397, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6226714801444043, | |
| "grad_norm": 0.028026578947901726, | |
| "learning_rate": 4.552647941977898e-06, | |
| "loss": 0.8421, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.623826714801444, | |
| "grad_norm": 0.02701553702354431, | |
| "learning_rate": 4.553989599704948e-06, | |
| "loss": 0.8129, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6249819494584837, | |
| "grad_norm": 0.028897127136588097, | |
| "learning_rate": 4.555328775177616e-06, | |
| "loss": 0.8049, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.6261371841155234, | |
| "grad_norm": 0.027024636045098305, | |
| "learning_rate": 4.556665477563979e-06, | |
| "loss": 0.8255, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6272924187725631, | |
| "grad_norm": 0.025836756452918053, | |
| "learning_rate": 4.557999715981412e-06, | |
| "loss": 0.8172, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.6284476534296028, | |
| "grad_norm": 0.027048436924815178, | |
| "learning_rate": 4.559331499496967e-06, | |
| "loss": 0.8336, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6296028880866426, | |
| "grad_norm": 0.02669835276901722, | |
| "learning_rate": 4.560660837127738e-06, | |
| "loss": 0.7958, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.6307581227436823, | |
| "grad_norm": 0.02850779891014099, | |
| "learning_rate": 4.5619877378412295e-06, | |
| "loss": 0.8313, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6319133574007221, | |
| "grad_norm": 0.027087301015853882, | |
| "learning_rate": 4.563312210555719e-06, | |
| "loss": 0.8257, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.6330685920577618, | |
| "grad_norm": 0.029034119099378586, | |
| "learning_rate": 4.564634264140616e-06, | |
| "loss": 0.8292, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6342238267148015, | |
| "grad_norm": 0.02700372040271759, | |
| "learning_rate": 4.5659539074168204e-06, | |
| "loss": 0.828, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.6353790613718412, | |
| "grad_norm": 0.027077239006757736, | |
| "learning_rate": 4.5672711491570735e-06, | |
| "loss": 0.829, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6365342960288809, | |
| "grad_norm": 0.027820030227303505, | |
| "learning_rate": 4.568585998086309e-06, | |
| "loss": 0.8323, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6376895306859206, | |
| "grad_norm": 0.02544805407524109, | |
| "learning_rate": 4.569898462881998e-06, | |
| "loss": 0.8424, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6388447653429603, | |
| "grad_norm": 0.025698795914649963, | |
| "learning_rate": 4.571208552174497e-06, | |
| "loss": 0.8471, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.026754576712846756, | |
| "learning_rate": 4.572516274547383e-06, | |
| "loss": 0.8277, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6411552346570397, | |
| "grad_norm": 0.026160219684243202, | |
| "learning_rate": 4.573821638537794e-06, | |
| "loss": 0.8458, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6423104693140794, | |
| "grad_norm": 0.02805655263364315, | |
| "learning_rate": 4.5751246526367635e-06, | |
| "loss": 0.8196, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6434657039711191, | |
| "grad_norm": 0.02578035369515419, | |
| "learning_rate": 4.576425325289549e-06, | |
| "loss": 0.8235, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6446209386281588, | |
| "grad_norm": 0.02676086314022541, | |
| "learning_rate": 4.577723664895965e-06, | |
| "loss": 0.7819, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6457761732851985, | |
| "grad_norm": 0.02770860493183136, | |
| "learning_rate": 4.579019679810706e-06, | |
| "loss": 0.8063, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6469314079422382, | |
| "grad_norm": 0.025705819949507713, | |
| "learning_rate": 4.5803133783436676e-06, | |
| "loss": 0.8145, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.648086642599278, | |
| "grad_norm": 0.025856079533696175, | |
| "learning_rate": 4.5816047687602695e-06, | |
| "loss": 0.7849, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6492418772563177, | |
| "grad_norm": 0.026201602071523666, | |
| "learning_rate": 4.582893859281769e-06, | |
| "loss": 0.8262, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6503971119133574, | |
| "grad_norm": 0.026211684569716454, | |
| "learning_rate": 4.584180658085578e-06, | |
| "loss": 0.8371, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.6515523465703971, | |
| "grad_norm": 0.02780756726861, | |
| "learning_rate": 4.585465173305571e-06, | |
| "loss": 0.8258, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6527075812274368, | |
| "grad_norm": 0.02710585482418537, | |
| "learning_rate": 4.586747413032399e-06, | |
| "loss": 0.8289, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.6538628158844766, | |
| "grad_norm": 0.02654946967959404, | |
| "learning_rate": 4.588027385313786e-06, | |
| "loss": 0.8208, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6550180505415163, | |
| "grad_norm": 0.025670049712061882, | |
| "learning_rate": 4.589305098154845e-06, | |
| "loss": 0.8236, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.656173285198556, | |
| "grad_norm": 0.025593513622879982, | |
| "learning_rate": 4.5905805595183656e-06, | |
| "loss": 0.8397, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6573285198555957, | |
| "grad_norm": 0.02713758684694767, | |
| "learning_rate": 4.59185377732512e-06, | |
| "loss": 0.8077, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6584837545126354, | |
| "grad_norm": 0.025683598592877388, | |
| "learning_rate": 4.5931247594541535e-06, | |
| "loss": 0.828, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6596389891696751, | |
| "grad_norm": 0.027867255732417107, | |
| "learning_rate": 4.594393513743081e-06, | |
| "loss": 0.8053, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6607942238267148, | |
| "grad_norm": 0.02871118299663067, | |
| "learning_rate": 4.595660047988374e-06, | |
| "loss": 0.8308, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6619494584837545, | |
| "grad_norm": 0.02591397613286972, | |
| "learning_rate": 4.59692436994565e-06, | |
| "loss": 0.8376, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6631046931407942, | |
| "grad_norm": 0.02970442920923233, | |
| "learning_rate": 4.598186487329957e-06, | |
| "loss": 0.8343, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6642599277978339, | |
| "grad_norm": 0.027048716321587563, | |
| "learning_rate": 4.599446407816052e-06, | |
| "loss": 0.8169, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6654151624548736, | |
| "grad_norm": 0.027099261060357094, | |
| "learning_rate": 4.600704139038687e-06, | |
| "loss": 0.8414, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6665703971119133, | |
| "grad_norm": 0.02764103189110756, | |
| "learning_rate": 4.601959688592886e-06, | |
| "loss": 0.8144, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.667725631768953, | |
| "grad_norm": 0.025973627343773842, | |
| "learning_rate": 4.603213064034216e-06, | |
| "loss": 0.8086, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6688808664259928, | |
| "grad_norm": 0.02816241979598999, | |
| "learning_rate": 4.6044642728790615e-06, | |
| "loss": 0.8266, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6700361010830325, | |
| "grad_norm": 0.02614632435142994, | |
| "learning_rate": 4.605713322604896e-06, | |
| "loss": 0.8365, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6711913357400722, | |
| "grad_norm": 0.02658606320619583, | |
| "learning_rate": 4.606960220650551e-06, | |
| "loss": 0.8273, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6723465703971119, | |
| "grad_norm": 0.02635134756565094, | |
| "learning_rate": 4.608204974416482e-06, | |
| "loss": 0.8169, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6735018050541516, | |
| "grad_norm": 0.025622064247727394, | |
| "learning_rate": 4.609447591265024e-06, | |
| "loss": 0.8256, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.6746570397111913, | |
| "grad_norm": 0.02630884386599064, | |
| "learning_rate": 4.610688078520667e-06, | |
| "loss": 0.8546, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.675812274368231, | |
| "grad_norm": 0.026515616104006767, | |
| "learning_rate": 4.611926443470302e-06, | |
| "loss": 0.829, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6769675090252708, | |
| "grad_norm": 0.025966297835111618, | |
| "learning_rate": 4.613162693363485e-06, | |
| "loss": 0.8206, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6781227436823105, | |
| "grad_norm": 0.025645708665251732, | |
| "learning_rate": 4.614396835412691e-06, | |
| "loss": 0.8214, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6792779783393502, | |
| "grad_norm": 0.026266956701874733, | |
| "learning_rate": 4.6156288767935645e-06, | |
| "loss": 0.8128, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6804332129963899, | |
| "grad_norm": 0.027987468987703323, | |
| "learning_rate": 4.61685882464517e-06, | |
| "loss": 0.8269, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6815884476534296, | |
| "grad_norm": 0.02621353790163994, | |
| "learning_rate": 4.61808668607024e-06, | |
| "loss": 0.817, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6827436823104693, | |
| "grad_norm": 0.026081636548042297, | |
| "learning_rate": 4.619312468135426e-06, | |
| "loss": 0.8168, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.683898916967509, | |
| "grad_norm": 0.026946187019348145, | |
| "learning_rate": 4.620536177871534e-06, | |
| "loss": 0.8208, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6850541516245487, | |
| "grad_norm": 0.02429202012717724, | |
| "learning_rate": 4.621757822273772e-06, | |
| "loss": 0.8032, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6862093862815885, | |
| "grad_norm": 0.027351096272468567, | |
| "learning_rate": 4.62297740830199e-06, | |
| "loss": 0.8234, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6873646209386282, | |
| "grad_norm": 0.026615051552653313, | |
| "learning_rate": 4.624194942880917e-06, | |
| "loss": 0.8205, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6885198555956679, | |
| "grad_norm": 0.025758862495422363, | |
| "learning_rate": 4.625410432900395e-06, | |
| "loss": 0.8372, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6896750902527076, | |
| "grad_norm": 0.026305649429559708, | |
| "learning_rate": 4.626623885215616e-06, | |
| "loss": 0.813, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6908303249097473, | |
| "grad_norm": 0.029083121567964554, | |
| "learning_rate": 4.627835306647352e-06, | |
| "loss": 0.801, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.691985559566787, | |
| "grad_norm": 0.02733645774424076, | |
| "learning_rate": 4.6290447039821865e-06, | |
| "loss": 0.818, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6931407942238267, | |
| "grad_norm": 0.026958592236042023, | |
| "learning_rate": 4.63025208397274e-06, | |
| "loss": 0.8189, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6942960288808664, | |
| "grad_norm": 0.02624497003853321, | |
| "learning_rate": 4.631457453337899e-06, | |
| "loss": 0.8217, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6954512635379061, | |
| "grad_norm": 0.026590224355459213, | |
| "learning_rate": 4.632660818763041e-06, | |
| "loss": 0.8413, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.6966064981949458, | |
| "grad_norm": 0.029455525800585747, | |
| "learning_rate": 4.633862186900253e-06, | |
| "loss": 0.8437, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6977617328519855, | |
| "grad_norm": 0.026593301445245743, | |
| "learning_rate": 4.635061564368554e-06, | |
| "loss": 0.8396, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6989169675090253, | |
| "grad_norm": 0.026324933394789696, | |
| "learning_rate": 4.636258957754115e-06, | |
| "loss": 0.828, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.700072202166065, | |
| "grad_norm": 0.027765916660428047, | |
| "learning_rate": 4.6374543736104774e-06, | |
| "loss": 0.8403, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7012274368231047, | |
| "grad_norm": 0.026490546762943268, | |
| "learning_rate": 4.638647818458763e-06, | |
| "loss": 0.8206, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.7023826714801444, | |
| "grad_norm": 0.028683004900813103, | |
| "learning_rate": 4.639839298787892e-06, | |
| "loss": 0.8203, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7035379061371841, | |
| "grad_norm": 0.026209495961666107, | |
| "learning_rate": 4.641028821054792e-06, | |
| "loss": 0.8127, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.7046931407942238, | |
| "grad_norm": 0.02771477773785591, | |
| "learning_rate": 4.642216391684613e-06, | |
| "loss": 0.8218, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7058483754512636, | |
| "grad_norm": 0.02859386056661606, | |
| "learning_rate": 4.643402017070924e-06, | |
| "loss": 0.7877, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.7070036101083033, | |
| "grad_norm": 0.026568232104182243, | |
| "learning_rate": 4.644585703575936e-06, | |
| "loss": 0.8062, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.708158844765343, | |
| "grad_norm": 0.030504100024700165, | |
| "learning_rate": 4.645767457530693e-06, | |
| "loss": 0.8068, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.7093140794223827, | |
| "grad_norm": 0.026128064841032028, | |
| "learning_rate": 4.64694728523528e-06, | |
| "loss": 0.8185, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7104693140794224, | |
| "grad_norm": 0.026173869147896767, | |
| "learning_rate": 4.648125192959028e-06, | |
| "loss": 0.8305, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.7116245487364621, | |
| "grad_norm": 0.02751108445227146, | |
| "learning_rate": 4.649301186940709e-06, | |
| "loss": 0.8176, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7127797833935018, | |
| "grad_norm": 0.026463057845830917, | |
| "learning_rate": 4.650475273388737e-06, | |
| "loss": 0.8011, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.7139350180505415, | |
| "grad_norm": 0.026889406144618988, | |
| "learning_rate": 4.65164745848136e-06, | |
| "loss": 0.8319, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.7150902527075812, | |
| "grad_norm": 0.02862033247947693, | |
| "learning_rate": 4.652817748366864e-06, | |
| "loss": 0.8394, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.7162454873646209, | |
| "grad_norm": 0.025286676362156868, | |
| "learning_rate": 4.653986149163757e-06, | |
| "loss": 0.825, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7174007220216606, | |
| "grad_norm": 0.026653330773115158, | |
| "learning_rate": 4.655152666960967e-06, | |
| "loss": 0.7891, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.7185559566787003, | |
| "grad_norm": 0.027324773371219635, | |
| "learning_rate": 4.656317307818032e-06, | |
| "loss": 0.8105, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.71971119133574, | |
| "grad_norm": 0.026749735698103905, | |
| "learning_rate": 4.657480077765284e-06, | |
| "loss": 0.8125, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.7208664259927798, | |
| "grad_norm": 0.027720510959625244, | |
| "learning_rate": 4.6586409828040405e-06, | |
| "loss": 0.8385, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7220216606498195, | |
| "grad_norm": 0.02868843637406826, | |
| "learning_rate": 4.659800028906792e-06, | |
| "loss": 0.8221, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.7231768953068592, | |
| "grad_norm": 0.026634184643626213, | |
| "learning_rate": 4.660957222017384e-06, | |
| "loss": 0.824, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.724332129963899, | |
| "grad_norm": 0.02601156197488308, | |
| "learning_rate": 4.662112568051194e-06, | |
| "loss": 0.813, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.7254873646209387, | |
| "grad_norm": 0.028468729928135872, | |
| "learning_rate": 4.663266072895327e-06, | |
| "loss": 0.8229, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7266425992779784, | |
| "grad_norm": 0.026884114369750023, | |
| "learning_rate": 4.664417742408782e-06, | |
| "loss": 0.8201, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.7277978339350181, | |
| "grad_norm": 0.02819686383008957, | |
| "learning_rate": 4.6655675824226375e-06, | |
| "loss": 0.8048, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7289530685920578, | |
| "grad_norm": 0.02609233930706978, | |
| "learning_rate": 4.666715598740224e-06, | |
| "loss": 0.8145, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.7301083032490975, | |
| "grad_norm": 0.025876475498080254, | |
| "learning_rate": 4.667861797137309e-06, | |
| "loss": 0.8236, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7312635379061372, | |
| "grad_norm": 0.025990156456828117, | |
| "learning_rate": 4.669006183362259e-06, | |
| "loss": 0.8416, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.7324187725631769, | |
| "grad_norm": 0.026048380881547928, | |
| "learning_rate": 4.670148763136221e-06, | |
| "loss": 0.8004, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7335740072202166, | |
| "grad_norm": 0.0267790500074625, | |
| "learning_rate": 4.6712895421532935e-06, | |
| "loss": 0.8249, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.7347292418772563, | |
| "grad_norm": 0.027045415714383125, | |
| "learning_rate": 4.672428526080691e-06, | |
| "loss": 0.8346, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.735884476534296, | |
| "grad_norm": 0.026687374338507652, | |
| "learning_rate": 4.673565720558918e-06, | |
| "loss": 0.8206, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.7370397111913357, | |
| "grad_norm": 0.025013815611600876, | |
| "learning_rate": 4.674701131201937e-06, | |
| "loss": 0.8278, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7381949458483754, | |
| "grad_norm": 0.02536069042980671, | |
| "learning_rate": 4.675834763597334e-06, | |
| "loss": 0.8051, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.7393501805054151, | |
| "grad_norm": 0.025835523381829262, | |
| "learning_rate": 4.676966623306479e-06, | |
| "loss": 0.8131, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7405054151624548, | |
| "grad_norm": 0.026328476145863533, | |
| "learning_rate": 4.678096715864696e-06, | |
| "loss": 0.834, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.7416606498194945, | |
| "grad_norm": 0.027017908170819283, | |
| "learning_rate": 4.679225046781422e-06, | |
| "loss": 0.808, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7428158844765343, | |
| "grad_norm": 0.026868514716625214, | |
| "learning_rate": 4.6803516215403704e-06, | |
| "loss": 0.8236, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.743971119133574, | |
| "grad_norm": 0.026412272825837135, | |
| "learning_rate": 4.6814764455996875e-06, | |
| "loss": 0.8343, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7451263537906138, | |
| "grad_norm": 0.025585437193512917, | |
| "learning_rate": 4.682599524392114e-06, | |
| "loss": 0.8165, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.7462815884476535, | |
| "grad_norm": 0.026194840669631958, | |
| "learning_rate": 4.683720863325141e-06, | |
| "loss": 0.8284, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7474368231046932, | |
| "grad_norm": 0.025945279747247696, | |
| "learning_rate": 4.6848404677811685e-06, | |
| "loss": 0.8213, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.7485920577617329, | |
| "grad_norm": 0.02698802575469017, | |
| "learning_rate": 4.685958343117656e-06, | |
| "loss": 0.8399, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7497472924187726, | |
| "grad_norm": 0.02705751173198223, | |
| "learning_rate": 4.687074494667283e-06, | |
| "loss": 0.8254, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.7509025270758123, | |
| "grad_norm": 0.02601143904030323, | |
| "learning_rate": 4.688188927738093e-06, | |
| "loss": 0.8283, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.752057761732852, | |
| "grad_norm": 0.026112712919712067, | |
| "learning_rate": 4.689301647613653e-06, | |
| "loss": 0.8334, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.7532129963898917, | |
| "grad_norm": 0.02763291820883751, | |
| "learning_rate": 4.690412659553201e-06, | |
| "loss": 0.8195, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7543682310469314, | |
| "grad_norm": 0.02655601128935814, | |
| "learning_rate": 4.69152196879179e-06, | |
| "loss": 0.8164, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7555234657039711, | |
| "grad_norm": 0.024583589285612106, | |
| "learning_rate": 4.692629580540446e-06, | |
| "loss": 0.8112, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7566787003610108, | |
| "grad_norm": 0.02777348831295967, | |
| "learning_rate": 4.693735499986305e-06, | |
| "loss": 0.8228, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7578339350180505, | |
| "grad_norm": 0.025867123156785965, | |
| "learning_rate": 4.694839732292768e-06, | |
| "loss": 0.8169, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7589891696750902, | |
| "grad_norm": 0.02569735422730446, | |
| "learning_rate": 4.695942282599635e-06, | |
| "loss": 0.8274, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7601444043321299, | |
| "grad_norm": 0.026997772976756096, | |
| "learning_rate": 4.69704315602326e-06, | |
| "loss": 0.8127, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7612996389891696, | |
| "grad_norm": 0.026205215603113174, | |
| "learning_rate": 4.698142357656684e-06, | |
| "loss": 0.8053, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.7624548736462093, | |
| "grad_norm": 0.026929359883069992, | |
| "learning_rate": 4.699239892569782e-06, | |
| "loss": 0.8049, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.763610108303249, | |
| "grad_norm": 0.02786414325237274, | |
| "learning_rate": 4.700335765809401e-06, | |
| "loss": 0.8098, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7647653429602889, | |
| "grad_norm": 0.02785409241914749, | |
| "learning_rate": 4.701429982399501e-06, | |
| "loss": 0.8203, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7659205776173286, | |
| "grad_norm": 0.02913859114050865, | |
| "learning_rate": 4.702522547341289e-06, | |
| "loss": 0.8174, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7670758122743683, | |
| "grad_norm": 0.025960449129343033, | |
| "learning_rate": 4.703613465613363e-06, | |
| "loss": 0.8111, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.768231046931408, | |
| "grad_norm": 0.027514390647411346, | |
| "learning_rate": 4.704702742171842e-06, | |
| "loss": 0.7999, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7693862815884477, | |
| "grad_norm": 0.026732420548796654, | |
| "learning_rate": 4.705790381950503e-06, | |
| "loss": 0.8332, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7705415162454874, | |
| "grad_norm": 0.025522086769342422, | |
| "learning_rate": 4.706876389860915e-06, | |
| "loss": 0.8179, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7716967509025271, | |
| "grad_norm": 0.026885733008384705, | |
| "learning_rate": 4.707960770792576e-06, | |
| "loss": 0.7875, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7728519855595668, | |
| "grad_norm": 0.026839058846235275, | |
| "learning_rate": 4.709043529613039e-06, | |
| "loss": 0.8029, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.7740072202166065, | |
| "grad_norm": 0.02562631480395794, | |
| "learning_rate": 4.710124671168044e-06, | |
| "loss": 0.8184, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7751624548736462, | |
| "grad_norm": 0.026263324543833733, | |
| "learning_rate": 4.7112042002816544e-06, | |
| "loss": 0.8192, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7763176895306859, | |
| "grad_norm": 0.026700858026742935, | |
| "learning_rate": 4.7122821217563755e-06, | |
| "loss": 0.827, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7774729241877256, | |
| "grad_norm": 0.02566208317875862, | |
| "learning_rate": 4.7133584403732955e-06, | |
| "loss": 0.8072, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7786281588447653, | |
| "grad_norm": 0.02590363658964634, | |
| "learning_rate": 4.7144331608922e-06, | |
| "loss": 0.8111, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.779783393501805, | |
| "grad_norm": 0.025736572220921516, | |
| "learning_rate": 4.715506288051709e-06, | |
| "loss": 0.8077, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7809386281588447, | |
| "grad_norm": 0.027111530303955078, | |
| "learning_rate": 4.716577826569394e-06, | |
| "loss": 0.8429, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7820938628158844, | |
| "grad_norm": 0.02599494345486164, | |
| "learning_rate": 4.717647781141908e-06, | |
| "loss": 0.8125, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.7832490974729241, | |
| "grad_norm": 0.02675134316086769, | |
| "learning_rate": 4.7187161564451065e-06, | |
| "loss": 0.8131, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7844043321299639, | |
| "grad_norm": 0.026113273575901985, | |
| "learning_rate": 4.71978295713417e-06, | |
| "loss": 0.8292, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7855595667870036, | |
| "grad_norm": 0.024617226794362068, | |
| "learning_rate": 4.720848187843727e-06, | |
| "loss": 0.8032, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7867148014440434, | |
| "grad_norm": 0.02645159512758255, | |
| "learning_rate": 4.721911853187975e-06, | |
| "loss": 0.8287, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7878700361010831, | |
| "grad_norm": 0.026287630200386047, | |
| "learning_rate": 4.722973957760799e-06, | |
| "loss": 0.8098, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7890252707581228, | |
| "grad_norm": 0.02640565298497677, | |
| "learning_rate": 4.724034506135888e-06, | |
| "loss": 0.8303, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7901805054151625, | |
| "grad_norm": 0.027293162420392036, | |
| "learning_rate": 4.725093502866861e-06, | |
| "loss": 0.7875, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7913357400722022, | |
| "grad_norm": 0.02671169303357601, | |
| "learning_rate": 4.7261509524873765e-06, | |
| "loss": 0.7986, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7924909747292419, | |
| "grad_norm": 0.025389740243554115, | |
| "learning_rate": 4.727206859511253e-06, | |
| "loss": 0.8168, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7936462093862816, | |
| "grad_norm": 0.02669006958603859, | |
| "learning_rate": 4.7282612284325845e-06, | |
| "loss": 0.8061, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7948014440433213, | |
| "grad_norm": 0.027196478098630905, | |
| "learning_rate": 4.729314063725853e-06, | |
| "loss": 0.8077, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.795956678700361, | |
| "grad_norm": 0.026497790589928627, | |
| "learning_rate": 4.730365369846044e-06, | |
| "loss": 0.8161, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7971119133574007, | |
| "grad_norm": 0.025808461010456085, | |
| "learning_rate": 4.73141515122876e-06, | |
| "loss": 0.8042, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7982671480144404, | |
| "grad_norm": 0.02556486800312996, | |
| "learning_rate": 4.732463412290331e-06, | |
| "loss": 0.7871, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7994223826714801, | |
| "grad_norm": 0.02653394266963005, | |
| "learning_rate": 4.73351015742793e-06, | |
| "loss": 0.8163, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8005776173285198, | |
| "grad_norm": 0.027373354882001877, | |
| "learning_rate": 4.734555391019678e-06, | |
| "loss": 0.8073, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.8017328519855595, | |
| "grad_norm": 0.02534923329949379, | |
| "learning_rate": 4.735599117424759e-06, | |
| "loss": 0.8186, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8028880866425993, | |
| "grad_norm": 0.025402942672371864, | |
| "learning_rate": 4.736641340983524e-06, | |
| "loss": 0.8198, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.804043321299639, | |
| "grad_norm": 0.025877540931105614, | |
| "learning_rate": 4.737682066017604e-06, | |
| "loss": 0.7954, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8051985559566787, | |
| "grad_norm": 0.027829859405755997, | |
| "learning_rate": 4.738721296830016e-06, | |
| "loss": 0.8197, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.8063537906137184, | |
| "grad_norm": 0.02695806324481964, | |
| "learning_rate": 4.739759037705269e-06, | |
| "loss": 0.8361, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8075090252707581, | |
| "grad_norm": 0.02655518613755703, | |
| "learning_rate": 4.740795292909469e-06, | |
| "loss": 0.8055, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.8086642599277978, | |
| "grad_norm": 0.02709355391561985, | |
| "learning_rate": 4.741830066690428e-06, | |
| "loss": 0.7907, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8098194945848376, | |
| "grad_norm": 0.026433579623699188, | |
| "learning_rate": 4.742863363277765e-06, | |
| "loss": 0.8235, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.8109747292418773, | |
| "grad_norm": 0.025645030662417412, | |
| "learning_rate": 4.743895186883009e-06, | |
| "loss": 0.8076, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.812129963898917, | |
| "grad_norm": 0.027833297848701477, | |
| "learning_rate": 4.744925541699707e-06, | |
| "loss": 0.8316, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.8132851985559567, | |
| "grad_norm": 0.025668229907751083, | |
| "learning_rate": 4.74595443190352e-06, | |
| "loss": 0.8075, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8144404332129964, | |
| "grad_norm": 0.026288649067282677, | |
| "learning_rate": 4.746981861652332e-06, | |
| "loss": 0.8421, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.8155956678700361, | |
| "grad_norm": 0.024897335097193718, | |
| "learning_rate": 4.7480078350863404e-06, | |
| "loss": 0.8132, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8167509025270758, | |
| "grad_norm": 0.0259522944688797, | |
| "learning_rate": 4.749032356328167e-06, | |
| "loss": 0.8326, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.8179061371841155, | |
| "grad_norm": 0.026102952659130096, | |
| "learning_rate": 4.750055429482949e-06, | |
| "loss": 0.8118, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8190613718411552, | |
| "grad_norm": 0.025872284546494484, | |
| "learning_rate": 4.751077058638445e-06, | |
| "loss": 0.816, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.8202166064981949, | |
| "grad_norm": 0.025316176936030388, | |
| "learning_rate": 4.752097247865126e-06, | |
| "loss": 0.8061, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8213718411552346, | |
| "grad_norm": 0.02691754698753357, | |
| "learning_rate": 4.753116001216277e-06, | |
| "loss": 0.7946, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.8225270758122744, | |
| "grad_norm": 0.027329301461577415, | |
| "learning_rate": 4.754133322728095e-06, | |
| "loss": 0.8379, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8236823104693141, | |
| "grad_norm": 0.026258062571287155, | |
| "learning_rate": 4.755149216419776e-06, | |
| "loss": 0.8101, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.8248375451263538, | |
| "grad_norm": 0.02672073245048523, | |
| "learning_rate": 4.756163686293624e-06, | |
| "loss": 0.8225, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8259927797833935, | |
| "grad_norm": 0.028196029365062714, | |
| "learning_rate": 4.757176736335135e-06, | |
| "loss": 0.848, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.8271480144404332, | |
| "grad_norm": 0.02594706416130066, | |
| "learning_rate": 4.758188370513093e-06, | |
| "loss": 0.8426, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8283032490974729, | |
| "grad_norm": 0.028120605275034904, | |
| "learning_rate": 4.759198592779668e-06, | |
| "loss": 0.8306, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.8294584837545126, | |
| "grad_norm": 0.027138734236359596, | |
| "learning_rate": 4.760207407070501e-06, | |
| "loss": 0.825, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8306137184115523, | |
| "grad_norm": 0.027197662740945816, | |
| "learning_rate": 4.761214817304805e-06, | |
| "loss": 0.8071, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.8317689530685921, | |
| "grad_norm": 0.026295404881238937, | |
| "learning_rate": 4.7622208273854484e-06, | |
| "loss": 0.8158, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8329241877256318, | |
| "grad_norm": 0.02767680026590824, | |
| "learning_rate": 4.763225441199049e-06, | |
| "loss": 0.8226, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.8340794223826715, | |
| "grad_norm": 0.025539277121424675, | |
| "learning_rate": 4.764228662616066e-06, | |
| "loss": 0.8025, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.8352346570397112, | |
| "grad_norm": 0.02579374797642231, | |
| "learning_rate": 4.765230495490885e-06, | |
| "loss": 0.842, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.8363898916967509, | |
| "grad_norm": 0.025891058146953583, | |
| "learning_rate": 4.766230943661912e-06, | |
| "loss": 0.8135, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8375451263537906, | |
| "grad_norm": 0.02527502365410328, | |
| "learning_rate": 4.767230010951657e-06, | |
| "loss": 0.8087, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.8387003610108303, | |
| "grad_norm": 0.02618500031530857, | |
| "learning_rate": 4.768227701166823e-06, | |
| "loss": 0.8242, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.83985559566787, | |
| "grad_norm": 0.026595618575811386, | |
| "learning_rate": 4.769224018098397e-06, | |
| "loss": 0.8418, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.8410108303249098, | |
| "grad_norm": 0.02652081847190857, | |
| "learning_rate": 4.770218965521729e-06, | |
| "loss": 0.8013, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8421660649819495, | |
| "grad_norm": 0.026961620897054672, | |
| "learning_rate": 4.771212547196624e-06, | |
| "loss": 0.7984, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.8433212996389892, | |
| "grad_norm": 0.024961085990071297, | |
| "learning_rate": 4.772204766867427e-06, | |
| "loss": 0.809, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8444765342960289, | |
| "grad_norm": 0.026465538889169693, | |
| "learning_rate": 4.773195628263101e-06, | |
| "loss": 0.8186, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.8456317689530686, | |
| "grad_norm": 0.025633295997977257, | |
| "learning_rate": 4.77418513509732e-06, | |
| "loss": 0.8111, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8467870036101083, | |
| "grad_norm": 0.026612253859639168, | |
| "learning_rate": 4.775173291068547e-06, | |
| "loss": 0.8218, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.847942238267148, | |
| "grad_norm": 0.026292981579899788, | |
| "learning_rate": 4.776160099860117e-06, | |
| "loss": 0.8107, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8490974729241877, | |
| "grad_norm": 0.026987800374627113, | |
| "learning_rate": 4.777145565140325e-06, | |
| "loss": 0.8088, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8502527075812274, | |
| "grad_norm": 0.027124376967549324, | |
| "learning_rate": 4.778129690562499e-06, | |
| "loss": 0.7977, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8514079422382671, | |
| "grad_norm": 0.025853095576167107, | |
| "learning_rate": 4.7791124797650865e-06, | |
| "loss": 0.8105, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.8525631768953068, | |
| "grad_norm": 0.027579553425312042, | |
| "learning_rate": 4.780093936371737e-06, | |
| "loss": 0.825, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8537184115523466, | |
| "grad_norm": 0.027602966874837875, | |
| "learning_rate": 4.781074063991377e-06, | |
| "loss": 0.8088, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8548736462093863, | |
| "grad_norm": 0.025124140083789825, | |
| "learning_rate": 4.782052866218294e-06, | |
| "loss": 0.7909, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.856028880866426, | |
| "grad_norm": 0.028426257893443108, | |
| "learning_rate": 4.783030346632214e-06, | |
| "loss": 0.8036, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.8571841155234657, | |
| "grad_norm": 0.026561887934803963, | |
| "learning_rate": 4.784006508798379e-06, | |
| "loss": 0.8118, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.8583393501805054, | |
| "grad_norm": 0.027730902656912804, | |
| "learning_rate": 4.7849813562676265e-06, | |
| "loss": 0.8319, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8594945848375452, | |
| "grad_norm": 0.02669842354953289, | |
| "learning_rate": 4.785954892576465e-06, | |
| "loss": 0.8153, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8606498194945849, | |
| "grad_norm": 0.02621045894920826, | |
| "learning_rate": 4.786927121247155e-06, | |
| "loss": 0.8164, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8618050541516246, | |
| "grad_norm": 0.02661043219268322, | |
| "learning_rate": 4.787898045787781e-06, | |
| "loss": 0.7877, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8629602888086643, | |
| "grad_norm": 0.02501833625137806, | |
| "learning_rate": 4.788867669692332e-06, | |
| "loss": 0.8096, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.864115523465704, | |
| "grad_norm": 0.025194406509399414, | |
| "learning_rate": 4.78983599644077e-06, | |
| "loss": 0.791, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8652707581227437, | |
| "grad_norm": 0.026089461520314217, | |
| "learning_rate": 4.790803029499111e-06, | |
| "loss": 0.7906, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.8664259927797834, | |
| "grad_norm": 0.025562454015016556, | |
| "learning_rate": 4.7917687723195e-06, | |
| "loss": 0.7749, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8675812274368231, | |
| "grad_norm": 0.025910962373018265, | |
| "learning_rate": 4.792733228340281e-06, | |
| "loss": 0.7991, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.8687364620938628, | |
| "grad_norm": 0.026650816202163696, | |
| "learning_rate": 4.793696400986071e-06, | |
| "loss": 0.7999, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8698916967509025, | |
| "grad_norm": 0.025604519993066788, | |
| "learning_rate": 4.794658293667835e-06, | |
| "loss": 0.8337, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.8710469314079422, | |
| "grad_norm": 0.02709670551121235, | |
| "learning_rate": 4.795618909782958e-06, | |
| "loss": 0.8128, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8722021660649819, | |
| "grad_norm": 0.027338657528162003, | |
| "learning_rate": 4.7965782527153145e-06, | |
| "loss": 0.8088, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8733574007220216, | |
| "grad_norm": 0.028811268508434296, | |
| "learning_rate": 4.797536325835345e-06, | |
| "loss": 0.8195, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8745126353790613, | |
| "grad_norm": 0.02585168555378914, | |
| "learning_rate": 4.798493132500121e-06, | |
| "loss": 0.8067, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8756678700361011, | |
| "grad_norm": 0.024853166192770004, | |
| "learning_rate": 4.799448676053423e-06, | |
| "loss": 0.8032, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.8768231046931408, | |
| "grad_norm": 0.030403772369027138, | |
| "learning_rate": 4.800402959825802e-06, | |
| "loss": 0.8369, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8779783393501805, | |
| "grad_norm": 0.02667844668030739, | |
| "learning_rate": 4.801355987134653e-06, | |
| "loss": 0.8112, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8791335740072203, | |
| "grad_norm": 0.026796355843544006, | |
| "learning_rate": 4.802307761284289e-06, | |
| "loss": 0.819, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.88028880866426, | |
| "grad_norm": 0.02874310314655304, | |
| "learning_rate": 4.803258285566001e-06, | |
| "loss": 0.8149, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8814440433212997, | |
| "grad_norm": 0.027145931497216225, | |
| "learning_rate": 4.804207563258135e-06, | |
| "loss": 0.8095, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.8825992779783394, | |
| "grad_norm": 0.02635267935693264, | |
| "learning_rate": 4.80515559762615e-06, | |
| "loss": 0.8188, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8837545126353791, | |
| "grad_norm": 0.028352461755275726, | |
| "learning_rate": 4.806102391922697e-06, | |
| "loss": 0.8248, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8849097472924188, | |
| "grad_norm": 0.026468653231859207, | |
| "learning_rate": 4.8070479493876735e-06, | |
| "loss": 0.8083, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8860649819494585, | |
| "grad_norm": 0.02515551634132862, | |
| "learning_rate": 4.8079922732483016e-06, | |
| "loss": 0.8104, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.8872202166064982, | |
| "grad_norm": 0.02771628648042679, | |
| "learning_rate": 4.808935366719187e-06, | |
| "loss": 0.8083, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8883754512635379, | |
| "grad_norm": 0.0274192001670599, | |
| "learning_rate": 4.8098772330023855e-06, | |
| "loss": 0.8181, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8895306859205776, | |
| "grad_norm": 0.025738628581166267, | |
| "learning_rate": 4.81081787528747e-06, | |
| "loss": 0.8166, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8906859205776173, | |
| "grad_norm": 0.026444412767887115, | |
| "learning_rate": 4.8117572967515955e-06, | |
| "loss": 0.8061, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.891841155234657, | |
| "grad_norm": 0.027295244857668877, | |
| "learning_rate": 4.812695500559561e-06, | |
| "loss": 0.796, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8929963898916967, | |
| "grad_norm": 0.029825204983353615, | |
| "learning_rate": 4.813632489863876e-06, | |
| "loss": 0.8269, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.8941516245487364, | |
| "grad_norm": 0.027049973607063293, | |
| "learning_rate": 4.814568267804822e-06, | |
| "loss": 0.8166, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8953068592057761, | |
| "grad_norm": 0.027095604687929153, | |
| "learning_rate": 4.815502837510518e-06, | |
| "loss": 0.8006, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8964620938628158, | |
| "grad_norm": 0.02874009497463703, | |
| "learning_rate": 4.816436202096981e-06, | |
| "loss": 0.7987, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8976173285198557, | |
| "grad_norm": 0.027292495593428612, | |
| "learning_rate": 4.817368364668191e-06, | |
| "loss": 0.8148, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8987725631768954, | |
| "grad_norm": 0.025094101205468178, | |
| "learning_rate": 4.8182993283161485e-06, | |
| "loss": 0.833, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8999277978339351, | |
| "grad_norm": 0.026829306036233902, | |
| "learning_rate": 4.819229096120941e-06, | |
| "loss": 0.8245, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.9010830324909748, | |
| "grad_norm": 0.0263043362647295, | |
| "learning_rate": 4.820157671150801e-06, | |
| "loss": 0.826, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9022382671480145, | |
| "grad_norm": 0.029344236478209496, | |
| "learning_rate": 4.821085056462168e-06, | |
| "loss": 0.8322, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.9033935018050542, | |
| "grad_norm": 0.02591596357524395, | |
| "learning_rate": 4.822011255099747e-06, | |
| "loss": 0.8305, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.9045487364620939, | |
| "grad_norm": 0.02538052201271057, | |
| "learning_rate": 4.8229362700965724e-06, | |
| "loss": 0.8119, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.9057039711191336, | |
| "grad_norm": 0.028109556064009666, | |
| "learning_rate": 4.823860104474065e-06, | |
| "loss": 0.8334, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9068592057761733, | |
| "grad_norm": 0.027156807482242584, | |
| "learning_rate": 4.8247827612420875e-06, | |
| "loss": 0.8148, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.908014440433213, | |
| "grad_norm": 0.02679731696844101, | |
| "learning_rate": 4.825704243399014e-06, | |
| "loss": 0.8228, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9091696750902527, | |
| "grad_norm": 0.026814332231879234, | |
| "learning_rate": 4.8266245539317745e-06, | |
| "loss": 0.8027, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.9103249097472924, | |
| "grad_norm": 0.02507951483130455, | |
| "learning_rate": 4.827543695815926e-06, | |
| "loss": 0.796, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9114801444043321, | |
| "grad_norm": 0.026251574978232384, | |
| "learning_rate": 4.828461672015701e-06, | |
| "loss": 0.8125, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.9126353790613718, | |
| "grad_norm": 0.02636023238301277, | |
| "learning_rate": 4.82937848548407e-06, | |
| "loss": 0.7954, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.9137906137184115, | |
| "grad_norm": 0.026472898200154305, | |
| "learning_rate": 4.830294139162795e-06, | |
| "loss": 0.8045, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.9149458483754512, | |
| "grad_norm": 0.025792265310883522, | |
| "learning_rate": 4.83120863598249e-06, | |
| "loss": 0.8072, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.9161010830324909, | |
| "grad_norm": 0.028488213196396828, | |
| "learning_rate": 4.832121978862673e-06, | |
| "loss": 0.7935, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.9172563176895306, | |
| "grad_norm": 0.02507089264690876, | |
| "learning_rate": 4.833034170711828e-06, | |
| "loss": 0.7944, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9184115523465703, | |
| "grad_norm": 0.026674294844269753, | |
| "learning_rate": 4.833945214427451e-06, | |
| "loss": 0.8029, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.91956678700361, | |
| "grad_norm": 0.02713027596473694, | |
| "learning_rate": 4.834855112896116e-06, | |
| "loss": 0.8342, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9207220216606499, | |
| "grad_norm": 0.026090459898114204, | |
| "learning_rate": 4.835763868993521e-06, | |
| "loss": 0.7986, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.9218772563176896, | |
| "grad_norm": 0.02631318010389805, | |
| "learning_rate": 4.83667148558455e-06, | |
| "loss": 0.8021, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9230324909747293, | |
| "grad_norm": 0.027506891638040543, | |
| "learning_rate": 4.8375779655233195e-06, | |
| "loss": 0.8305, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.924187725631769, | |
| "grad_norm": 0.02776806242763996, | |
| "learning_rate": 4.83848331165324e-06, | |
| "loss": 0.8141, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9253429602888087, | |
| "grad_norm": 0.027539484202861786, | |
| "learning_rate": 4.839387526807064e-06, | |
| "loss": 0.809, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.9264981949458484, | |
| "grad_norm": 0.02546604722738266, | |
| "learning_rate": 4.84029061380694e-06, | |
| "loss": 0.8075, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9276534296028881, | |
| "grad_norm": 0.02793939970433712, | |
| "learning_rate": 4.841192575464469e-06, | |
| "loss": 0.8116, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.9288086642599278, | |
| "grad_norm": 0.026220016181468964, | |
| "learning_rate": 4.842093414580753e-06, | |
| "loss": 0.8085, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9299638989169675, | |
| "grad_norm": 0.0267406664788723, | |
| "learning_rate": 4.842993133946448e-06, | |
| "loss": 0.8202, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.9311191335740072, | |
| "grad_norm": 0.028274521231651306, | |
| "learning_rate": 4.8438917363418184e-06, | |
| "loss": 0.8222, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9322743682310469, | |
| "grad_norm": 0.025681311264634132, | |
| "learning_rate": 4.844789224536785e-06, | |
| "loss": 0.8009, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.9334296028880866, | |
| "grad_norm": 0.02876345068216324, | |
| "learning_rate": 4.845685601290977e-06, | |
| "loss": 0.8129, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.9345848375451263, | |
| "grad_norm": 0.027744382619857788, | |
| "learning_rate": 4.846580869353787e-06, | |
| "loss": 0.8084, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.935740072202166, | |
| "grad_norm": 0.026011621579527855, | |
| "learning_rate": 4.847475031464417e-06, | |
| "loss": 0.8058, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9368953068592057, | |
| "grad_norm": 0.028499187901616096, | |
| "learning_rate": 4.848368090351928e-06, | |
| "loss": 0.8, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.9380505415162455, | |
| "grad_norm": 0.026256656274199486, | |
| "learning_rate": 4.849260048735293e-06, | |
| "loss": 0.8071, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9392057761732852, | |
| "grad_norm": 0.0278424471616745, | |
| "learning_rate": 4.8501509093234474e-06, | |
| "loss": 0.8173, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.9403610108303249, | |
| "grad_norm": 0.028124138712882996, | |
| "learning_rate": 4.851040674815336e-06, | |
| "loss": 0.8057, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9415162454873646, | |
| "grad_norm": 0.02599998749792576, | |
| "learning_rate": 4.851929347899962e-06, | |
| "loss": 0.7952, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.9426714801444044, | |
| "grad_norm": 0.03042020834982395, | |
| "learning_rate": 4.852816931256436e-06, | |
| "loss": 0.815, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9438267148014441, | |
| "grad_norm": 0.02526993304491043, | |
| "learning_rate": 4.853703427554027e-06, | |
| "loss": 0.8169, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.9449819494584838, | |
| "grad_norm": 0.029504787176847458, | |
| "learning_rate": 4.854588839452205e-06, | |
| "loss": 0.8134, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.9461371841155235, | |
| "grad_norm": 0.02730252593755722, | |
| "learning_rate": 4.855473169600699e-06, | |
| "loss": 0.8198, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.9472924187725632, | |
| "grad_norm": 0.02669229544699192, | |
| "learning_rate": 4.856356420639528e-06, | |
| "loss": 0.8426, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9484476534296029, | |
| "grad_norm": 0.029394106939435005, | |
| "learning_rate": 4.857238595199068e-06, | |
| "loss": 0.8025, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.9496028880866426, | |
| "grad_norm": 0.026510139927268028, | |
| "learning_rate": 4.858119695900084e-06, | |
| "loss": 0.8267, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.9507581227436823, | |
| "grad_norm": 0.025655683130025864, | |
| "learning_rate": 4.858999725353784e-06, | |
| "loss": 0.7936, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.951913357400722, | |
| "grad_norm": 0.029900815337896347, | |
| "learning_rate": 4.859878686161861e-06, | |
| "loss": 0.8245, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.9530685920577617, | |
| "grad_norm": 0.02642049826681614, | |
| "learning_rate": 4.860756580916543e-06, | |
| "loss": 0.8036, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.9542238267148014, | |
| "grad_norm": 0.027764586731791496, | |
| "learning_rate": 4.861633412200637e-06, | |
| "loss": 0.8132, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.9553790613718411, | |
| "grad_norm": 0.02786201983690262, | |
| "learning_rate": 4.862509182587578e-06, | |
| "loss": 0.8189, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.9565342960288808, | |
| "grad_norm": 0.02661985345184803, | |
| "learning_rate": 4.8633838946414675e-06, | |
| "loss": 0.8056, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9576895306859206, | |
| "grad_norm": 0.029807835817337036, | |
| "learning_rate": 4.864257550917123e-06, | |
| "loss": 0.8108, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.9588447653429603, | |
| "grad_norm": 0.025031132623553276, | |
| "learning_rate": 4.8651301539601235e-06, | |
| "loss": 0.8031, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.028132742270827293, | |
| "learning_rate": 4.866001706306853e-06, | |
| "loss": 0.8199, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.9611552346570397, | |
| "grad_norm": 0.02886558324098587, | |
| "learning_rate": 4.866872210484541e-06, | |
| "loss": 0.8175, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9623104693140794, | |
| "grad_norm": 0.026310063898563385, | |
| "learning_rate": 4.867741669011313e-06, | |
| "loss": 0.7998, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.9634657039711191, | |
| "grad_norm": 0.02729332074522972, | |
| "learning_rate": 4.868610084396232e-06, | |
| "loss": 0.7809, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9646209386281589, | |
| "grad_norm": 0.027129124850034714, | |
| "learning_rate": 4.869477459139337e-06, | |
| "loss": 0.8219, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.9657761732851986, | |
| "grad_norm": 0.027403220534324646, | |
| "learning_rate": 4.8703437957316945e-06, | |
| "loss": 0.807, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9669314079422383, | |
| "grad_norm": 0.028556736186146736, | |
| "learning_rate": 4.871209096655434e-06, | |
| "loss": 0.8213, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.968086642599278, | |
| "grad_norm": 0.027089878916740417, | |
| "learning_rate": 4.872073364383795e-06, | |
| "loss": 0.8167, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9692418772563177, | |
| "grad_norm": 0.02591611072421074, | |
| "learning_rate": 4.872936601381167e-06, | |
| "loss": 0.7926, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.9703971119133574, | |
| "grad_norm": 0.028653450310230255, | |
| "learning_rate": 4.873798810103137e-06, | |
| "loss": 0.769, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9715523465703971, | |
| "grad_norm": 0.025429286062717438, | |
| "learning_rate": 4.874659992996521e-06, | |
| "loss": 0.8033, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.9727075812274368, | |
| "grad_norm": 0.027507685124874115, | |
| "learning_rate": 4.875520152499416e-06, | |
| "loss": 0.7991, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9738628158844765, | |
| "grad_norm": 0.02781766653060913, | |
| "learning_rate": 4.876379291041238e-06, | |
| "loss": 0.811, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.9750180505415162, | |
| "grad_norm": 0.02732367254793644, | |
| "learning_rate": 4.87723741104276e-06, | |
| "loss": 0.7795, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.976173285198556, | |
| "grad_norm": 0.027977894991636276, | |
| "learning_rate": 4.878094514916154e-06, | |
| "loss": 0.8024, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.9773285198555957, | |
| "grad_norm": 0.02706928178668022, | |
| "learning_rate": 4.87895060506504e-06, | |
| "loss": 0.8273, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9784837545126354, | |
| "grad_norm": 0.027973534539341927, | |
| "learning_rate": 4.879805683884512e-06, | |
| "loss": 0.8249, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.9796389891696751, | |
| "grad_norm": 0.028234517201781273, | |
| "learning_rate": 4.88065975376119e-06, | |
| "loss": 0.8009, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9807942238267148, | |
| "grad_norm": 0.026143617928028107, | |
| "learning_rate": 4.881512817073255e-06, | |
| "loss": 0.8114, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.9819494584837545, | |
| "grad_norm": 0.027196498587727547, | |
| "learning_rate": 4.882364876190489e-06, | |
| "loss": 0.7919, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9831046931407942, | |
| "grad_norm": 0.027922434732317924, | |
| "learning_rate": 4.883215933474314e-06, | |
| "loss": 0.7986, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.9842599277978339, | |
| "grad_norm": 0.0268290676176548, | |
| "learning_rate": 4.884065991277834e-06, | |
| "loss": 0.8142, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9854151624548736, | |
| "grad_norm": 0.026363542303442955, | |
| "learning_rate": 4.8849150519458725e-06, | |
| "loss": 0.7958, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9865703971119134, | |
| "grad_norm": 0.026863981038331985, | |
| "learning_rate": 4.88576311781501e-06, | |
| "loss": 0.8132, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9877256317689531, | |
| "grad_norm": 0.026298578828573227, | |
| "learning_rate": 4.886610191213622e-06, | |
| "loss": 0.8156, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9888808664259928, | |
| "grad_norm": 0.026729200035333633, | |
| "learning_rate": 4.8874562744619224e-06, | |
| "loss": 0.8221, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9900361010830325, | |
| "grad_norm": 0.025892335921525955, | |
| "learning_rate": 4.888301369871998e-06, | |
| "loss": 0.8267, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.9911913357400722, | |
| "grad_norm": 0.026062045246362686, | |
| "learning_rate": 4.8891454797478435e-06, | |
| "loss": 0.8206, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9923465703971119, | |
| "grad_norm": 0.025573009625077248, | |
| "learning_rate": 4.889988606385404e-06, | |
| "loss": 0.8466, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.9935018050541516, | |
| "grad_norm": 0.026895977556705475, | |
| "learning_rate": 4.890830752072613e-06, | |
| "loss": 0.801, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9946570397111913, | |
| "grad_norm": 0.026373952627182007, | |
| "learning_rate": 4.891671919089425e-06, | |
| "loss": 0.7935, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.995812274368231, | |
| "grad_norm": 0.025112492963671684, | |
| "learning_rate": 4.892512109707855e-06, | |
| "loss": 0.8104, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9969675090252708, | |
| "grad_norm": 0.025861382484436035, | |
| "learning_rate": 4.893351326192016e-06, | |
| "loss": 0.7755, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.9981227436823105, | |
| "grad_norm": 0.025520671159029007, | |
| "learning_rate": 4.8941895707981555e-06, | |
| "loss": 0.783, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9992779783393502, | |
| "grad_norm": 0.02590048499405384, | |
| "learning_rate": 4.895026845774691e-06, | |
| "loss": 0.7952, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.00043321299639, | |
| "grad_norm": 0.037370167672634125, | |
| "learning_rate": 4.895863153362244e-06, | |
| "loss": 1.0888, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.0015884476534296, | |
| "grad_norm": 0.028089042752981186, | |
| "learning_rate": 4.8966984957936845e-06, | |
| "loss": 0.8026, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.0027436823104694, | |
| "grad_norm": 0.02722124569118023, | |
| "learning_rate": 4.897532875294154e-06, | |
| "loss": 0.7892, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.003898916967509, | |
| "grad_norm": 0.02784401923418045, | |
| "learning_rate": 4.898366294081111e-06, | |
| "loss": 0.7766, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.0050541516245488, | |
| "grad_norm": 0.028576616197824478, | |
| "learning_rate": 4.899198754364365e-06, | |
| "loss": 0.794, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0062093862815884, | |
| "grad_norm": 0.02609376236796379, | |
| "learning_rate": 4.900030258346106e-06, | |
| "loss": 0.787, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.0073646209386282, | |
| "grad_norm": 0.026441458612680435, | |
| "learning_rate": 4.900860808220946e-06, | |
| "loss": 0.7726, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0085198555956678, | |
| "grad_norm": 0.026604607701301575, | |
| "learning_rate": 4.90169040617595e-06, | |
| "loss": 0.7672, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.0096750902527076, | |
| "grad_norm": 0.025642454624176025, | |
| "learning_rate": 4.902519054390672e-06, | |
| "loss": 0.7804, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0108303249097472, | |
| "grad_norm": 0.027134709060192108, | |
| "learning_rate": 4.9033467550371886e-06, | |
| "loss": 0.7895, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.011985559566787, | |
| "grad_norm": 0.02571208029985428, | |
| "learning_rate": 4.904173510280135e-06, | |
| "loss": 0.7916, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.0131407942238266, | |
| "grad_norm": 0.026592353358864784, | |
| "learning_rate": 4.904999322276735e-06, | |
| "loss": 0.7975, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.0142960288808665, | |
| "grad_norm": 0.02615254372358322, | |
| "learning_rate": 4.905824193176839e-06, | |
| "loss": 0.7881, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.015451263537906, | |
| "grad_norm": 0.027889380231499672, | |
| "learning_rate": 4.906648125122953e-06, | |
| "loss": 0.7897, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.0166064981949459, | |
| "grad_norm": 0.02607213519513607, | |
| "learning_rate": 4.907471120250281e-06, | |
| "loss": 0.7735, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0177617328519855, | |
| "grad_norm": 0.025891225785017014, | |
| "learning_rate": 4.9082931806867475e-06, | |
| "loss": 0.788, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.0189169675090253, | |
| "grad_norm": 0.02776011824607849, | |
| "learning_rate": 4.909114308553034e-06, | |
| "loss": 0.7902, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.020072202166065, | |
| "grad_norm": 0.026471663266420364, | |
| "learning_rate": 4.909934505962614e-06, | |
| "loss": 0.8, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.0212274368231047, | |
| "grad_norm": 0.026362843811511993, | |
| "learning_rate": 4.9107537750217884e-06, | |
| "loss": 0.7957, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0223826714801445, | |
| "grad_norm": 0.027810046449303627, | |
| "learning_rate": 4.9115721178297095e-06, | |
| "loss": 0.7986, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.023537906137184, | |
| "grad_norm": 0.02654910273849964, | |
| "learning_rate": 4.9123895364784185e-06, | |
| "loss": 0.7611, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.024693140794224, | |
| "grad_norm": 0.026264095678925514, | |
| "learning_rate": 4.913206033052878e-06, | |
| "loss": 0.808, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.0258483754512635, | |
| "grad_norm": 0.02679434046149254, | |
| "learning_rate": 4.914021609631002e-06, | |
| "loss": 0.7432, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0270036101083033, | |
| "grad_norm": 0.026940811425447464, | |
| "learning_rate": 4.914836268283691e-06, | |
| "loss": 0.7872, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.028158844765343, | |
| "grad_norm": 0.025167059153318405, | |
| "learning_rate": 4.915650011074855e-06, | |
| "loss": 0.7876, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.0293140794223827, | |
| "grad_norm": 0.026694072410464287, | |
| "learning_rate": 4.916462840061458e-06, | |
| "loss": 0.7919, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.0304693140794223, | |
| "grad_norm": 0.02730594575405121, | |
| "learning_rate": 4.917274757293539e-06, | |
| "loss": 0.7961, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0316245487364621, | |
| "grad_norm": 0.026232946664094925, | |
| "learning_rate": 4.918085764814244e-06, | |
| "loss": 0.7908, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.0327797833935017, | |
| "grad_norm": 0.027884062379598618, | |
| "learning_rate": 4.918895864659863e-06, | |
| "loss": 0.7752, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0339350180505416, | |
| "grad_norm": 0.027524368837475777, | |
| "learning_rate": 4.919705058859854e-06, | |
| "loss": 0.8114, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.0350902527075811, | |
| "grad_norm": 0.02609703689813614, | |
| "learning_rate": 4.920513349436876e-06, | |
| "loss": 0.801, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.036245487364621, | |
| "grad_norm": 0.027548260986804962, | |
| "learning_rate": 4.921320738406821e-06, | |
| "loss": 0.7736, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.0374007220216606, | |
| "grad_norm": 0.028695981949567795, | |
| "learning_rate": 4.922127227778841e-06, | |
| "loss": 0.7968, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.0385559566787004, | |
| "grad_norm": 0.02490762434899807, | |
| "learning_rate": 4.922932819555381e-06, | |
| "loss": 0.7695, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.03971119133574, | |
| "grad_norm": 0.030678994953632355, | |
| "learning_rate": 4.923737515732209e-06, | |
| "loss": 0.7764, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0408664259927798, | |
| "grad_norm": 0.02829810604453087, | |
| "learning_rate": 4.924541318298439e-06, | |
| "loss": 0.7881, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.0420216606498194, | |
| "grad_norm": 0.028521962463855743, | |
| "learning_rate": 4.925344229236571e-06, | |
| "loss": 0.791, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.0431768953068592, | |
| "grad_norm": 0.028740065172314644, | |
| "learning_rate": 4.926146250522511e-06, | |
| "loss": 0.778, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.044332129963899, | |
| "grad_norm": 0.02634969726204872, | |
| "learning_rate": 4.926947384125606e-06, | |
| "loss": 0.7784, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.0454873646209386, | |
| "grad_norm": 0.02609422616660595, | |
| "learning_rate": 4.927747632008672e-06, | |
| "loss": 0.7729, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.0466425992779784, | |
| "grad_norm": 0.025464767590165138, | |
| "learning_rate": 4.928546996128022e-06, | |
| "loss": 0.7846, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.047797833935018, | |
| "grad_norm": 0.026383783668279648, | |
| "learning_rate": 4.929345478433492e-06, | |
| "loss": 0.7672, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.0489530685920578, | |
| "grad_norm": 0.02692020870745182, | |
| "learning_rate": 4.930143080868476e-06, | |
| "loss": 0.777, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.0501083032490974, | |
| "grad_norm": 0.026965700089931488, | |
| "learning_rate": 4.930939805369947e-06, | |
| "loss": 0.8104, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.0512635379061372, | |
| "grad_norm": 0.0277117807418108, | |
| "learning_rate": 4.931735653868489e-06, | |
| "loss": 0.7729, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.0524187725631768, | |
| "grad_norm": 0.027171596884727478, | |
| "learning_rate": 4.932530628288331e-06, | |
| "loss": 0.7957, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.0535740072202167, | |
| "grad_norm": 0.0277806855738163, | |
| "learning_rate": 4.933324730547361e-06, | |
| "loss": 0.7988, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.0547292418772563, | |
| "grad_norm": 0.02666284516453743, | |
| "learning_rate": 4.934117962557165e-06, | |
| "loss": 0.7637, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.055884476534296, | |
| "grad_norm": 0.028444141149520874, | |
| "learning_rate": 4.934910326223052e-06, | |
| "loss": 0.77, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.0570397111913357, | |
| "grad_norm": 0.028864728286862373, | |
| "learning_rate": 4.93570182344408e-06, | |
| "loss": 0.7778, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.0581949458483755, | |
| "grad_norm": 0.026793263852596283, | |
| "learning_rate": 4.936492456113085e-06, | |
| "loss": 0.7879, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.059350180505415, | |
| "grad_norm": 0.027911733835935593, | |
| "learning_rate": 4.937282226116702e-06, | |
| "loss": 0.7698, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.060505415162455, | |
| "grad_norm": 0.026382414624094963, | |
| "learning_rate": 4.938071135335405e-06, | |
| "loss": 0.7637, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.0616606498194945, | |
| "grad_norm": 0.027525540441274643, | |
| "learning_rate": 4.938859185643519e-06, | |
| "loss": 0.8057, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.0628158844765343, | |
| "grad_norm": 0.028536858037114143, | |
| "learning_rate": 4.93964637890926e-06, | |
| "loss": 0.7773, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.0639711191335741, | |
| "grad_norm": 0.026540234684944153, | |
| "learning_rate": 4.9404327169947486e-06, | |
| "loss": 0.7912, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.0651263537906137, | |
| "grad_norm": 0.026590172201395035, | |
| "learning_rate": 4.941218201756049e-06, | |
| "loss": 0.7873, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.0662815884476535, | |
| "grad_norm": 0.026739628985524178, | |
| "learning_rate": 4.942002835043187e-06, | |
| "loss": 0.7853, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.0674368231046931, | |
| "grad_norm": 0.027753658592700958, | |
| "learning_rate": 4.9427866187001785e-06, | |
| "loss": 0.7853, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.068592057761733, | |
| "grad_norm": 0.0267436932772398, | |
| "learning_rate": 4.943569554565055e-06, | |
| "loss": 0.7851, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.0697472924187725, | |
| "grad_norm": 0.02634557895362377, | |
| "learning_rate": 4.9443516444698915e-06, | |
| "loss": 0.7881, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.0709025270758123, | |
| "grad_norm": 0.027419744059443474, | |
| "learning_rate": 4.945132890240829e-06, | |
| "loss": 0.813, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.072057761732852, | |
| "grad_norm": 0.026219435036182404, | |
| "learning_rate": 4.9459132936981044e-06, | |
| "loss": 0.7998, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.0732129963898918, | |
| "grad_norm": 0.026673052459955215, | |
| "learning_rate": 4.94669285665607e-06, | |
| "loss": 0.764, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.0743682310469314, | |
| "grad_norm": 0.027227727696299553, | |
| "learning_rate": 4.9474715809232256e-06, | |
| "loss": 0.7734, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.0755234657039712, | |
| "grad_norm": 0.025797106325626373, | |
| "learning_rate": 4.948249468302239e-06, | |
| "loss": 0.7959, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.0766787003610108, | |
| "grad_norm": 0.026355307549238205, | |
| "learning_rate": 4.9490265205899695e-06, | |
| "loss": 0.7987, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.0778339350180506, | |
| "grad_norm": 0.02700088918209076, | |
| "learning_rate": 4.949802739577501e-06, | |
| "loss": 0.8067, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.0789891696750902, | |
| "grad_norm": 0.02693251334130764, | |
| "learning_rate": 4.950578127050157e-06, | |
| "loss": 0.7855, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.08014440433213, | |
| "grad_norm": 0.02637103572487831, | |
| "learning_rate": 4.95135268478753e-06, | |
| "loss": 0.7821, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.0812996389891696, | |
| "grad_norm": 0.02696199156343937, | |
| "learning_rate": 4.952126414563509e-06, | |
| "loss": 0.7954, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.0824548736462094, | |
| "grad_norm": 0.025795504450798035, | |
| "learning_rate": 4.952899318146298e-06, | |
| "loss": 0.771, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.083610108303249, | |
| "grad_norm": 0.0262451134622097, | |
| "learning_rate": 4.953671397298441e-06, | |
| "loss": 0.7737, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.0847653429602888, | |
| "grad_norm": 0.027646278962492943, | |
| "learning_rate": 4.954442653776852e-06, | |
| "loss": 0.7702, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.0859205776173284, | |
| "grad_norm": 0.026158776134252548, | |
| "learning_rate": 4.955213089332832e-06, | |
| "loss": 0.772, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.0870758122743682, | |
| "grad_norm": 0.02821963280439377, | |
| "learning_rate": 4.955982705712095e-06, | |
| "loss": 0.7828, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.088231046931408, | |
| "grad_norm": 0.026813939213752747, | |
| "learning_rate": 4.956751504654796e-06, | |
| "loss": 0.7848, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.0893862815884476, | |
| "grad_norm": 0.025459513068199158, | |
| "learning_rate": 4.957519487895548e-06, | |
| "loss": 0.7655, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.0905415162454875, | |
| "grad_norm": 0.026770230382680893, | |
| "learning_rate": 4.958286657163448e-06, | |
| "loss": 0.773, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.091696750902527, | |
| "grad_norm": 0.02704155258834362, | |
| "learning_rate": 4.959053014182106e-06, | |
| "loss": 0.7755, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.0928519855595669, | |
| "grad_norm": 0.02700476534664631, | |
| "learning_rate": 4.959818560669656e-06, | |
| "loss": 0.8009, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.0940072202166065, | |
| "grad_norm": 0.028186574578285217, | |
| "learning_rate": 4.96058329833879e-06, | |
| "loss": 0.7705, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.0951624548736463, | |
| "grad_norm": 0.027806002646684647, | |
| "learning_rate": 4.9613472288967775e-06, | |
| "loss": 0.7727, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.0963176895306859, | |
| "grad_norm": 0.028479190543293953, | |
| "learning_rate": 4.962110354045489e-06, | |
| "loss": 0.7921, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.0974729241877257, | |
| "grad_norm": 0.027802351862192154, | |
| "learning_rate": 4.962872675481414e-06, | |
| "loss": 0.7827, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0986281588447653, | |
| "grad_norm": 0.026106078177690506, | |
| "learning_rate": 4.96363419489569e-06, | |
| "loss": 0.7663, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.099783393501805, | |
| "grad_norm": 0.027961455285549164, | |
| "learning_rate": 4.964394913974124e-06, | |
| "loss": 0.7735, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.1009386281588447, | |
| "grad_norm": 0.02648748643696308, | |
| "learning_rate": 4.965154834397211e-06, | |
| "loss": 0.8092, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.1020938628158845, | |
| "grad_norm": 0.026987139135599136, | |
| "learning_rate": 4.965913957840159e-06, | |
| "loss": 0.791, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.103249097472924, | |
| "grad_norm": 0.026114538311958313, | |
| "learning_rate": 4.966672285972911e-06, | |
| "loss": 0.7682, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.104404332129964, | |
| "grad_norm": 0.027255605906248093, | |
| "learning_rate": 4.967429820460167e-06, | |
| "loss": 0.7934, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.1055595667870035, | |
| "grad_norm": 0.026975391432642937, | |
| "learning_rate": 4.968186562961406e-06, | |
| "loss": 0.7758, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.1067148014440433, | |
| "grad_norm": 0.026317190378904343, | |
| "learning_rate": 4.968942515130908e-06, | |
| "loss": 0.7949, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.1078700361010831, | |
| "grad_norm": 0.027674200013279915, | |
| "learning_rate": 4.969697678617774e-06, | |
| "loss": 0.801, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.1090252707581227, | |
| "grad_norm": 0.026397835463285446, | |
| "learning_rate": 4.970452055065948e-06, | |
| "loss": 0.7872, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1101805054151626, | |
| "grad_norm": 0.025970855727791786, | |
| "learning_rate": 4.971205646114243e-06, | |
| "loss": 0.8076, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.1113357400722021, | |
| "grad_norm": 0.026946989819407463, | |
| "learning_rate": 4.9719584533963555e-06, | |
| "loss": 0.7646, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.112490974729242, | |
| "grad_norm": 0.026165366172790527, | |
| "learning_rate": 4.9727104785408915e-06, | |
| "loss": 0.7704, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.1136462093862816, | |
| "grad_norm": 0.026523860171437263, | |
| "learning_rate": 4.973461723171385e-06, | |
| "loss": 0.7787, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1148014440433214, | |
| "grad_norm": 0.02800065465271473, | |
| "learning_rate": 4.974212188906321e-06, | |
| "loss": 0.7824, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.115956678700361, | |
| "grad_norm": 0.02660745568573475, | |
| "learning_rate": 4.974961877359156e-06, | |
| "loss": 0.8069, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.1171119133574008, | |
| "grad_norm": 0.026590686291456223, | |
| "learning_rate": 4.975710790138337e-06, | |
| "loss": 0.7603, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.1182671480144404, | |
| "grad_norm": 0.028299605473876, | |
| "learning_rate": 4.976458928847323e-06, | |
| "loss": 0.7884, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1194223826714802, | |
| "grad_norm": 0.02655804343521595, | |
| "learning_rate": 4.977206295084609e-06, | |
| "loss": 0.7902, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.1205776173285198, | |
| "grad_norm": 0.02828875742852688, | |
| "learning_rate": 4.977952890443742e-06, | |
| "loss": 0.7927, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.1217328519855596, | |
| "grad_norm": 0.03140028193593025, | |
| "learning_rate": 4.978698716513342e-06, | |
| "loss": 0.8075, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.1228880866425992, | |
| "grad_norm": 0.028589541092514992, | |
| "learning_rate": 4.9794437748771245e-06, | |
| "loss": 0.767, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.124043321299639, | |
| "grad_norm": 0.028249988332390785, | |
| "learning_rate": 4.98018806711392e-06, | |
| "loss": 0.8103, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.1251985559566786, | |
| "grad_norm": 0.027847470715641975, | |
| "learning_rate": 4.980931594797693e-06, | |
| "loss": 0.7939, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1263537906137184, | |
| "grad_norm": 0.02874613367021084, | |
| "learning_rate": 4.981674359497562e-06, | |
| "loss": 0.7986, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.1275090252707582, | |
| "grad_norm": 0.02685857191681862, | |
| "learning_rate": 4.98241636277782e-06, | |
| "loss": 0.7765, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.1286642599277978, | |
| "grad_norm": 0.026808686554431915, | |
| "learning_rate": 4.9831576061979556e-06, | |
| "loss": 0.7786, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.1298194945848374, | |
| "grad_norm": 0.02733052521944046, | |
| "learning_rate": 4.98389809131267e-06, | |
| "loss": 0.7663, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.1309747292418773, | |
| "grad_norm": 0.0273138340562582, | |
| "learning_rate": 4.984637819671897e-06, | |
| "loss": 0.8018, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.132129963898917, | |
| "grad_norm": 0.02772151120007038, | |
| "learning_rate": 4.985376792820825e-06, | |
| "loss": 0.8052, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.1332851985559567, | |
| "grad_norm": 0.0273757204413414, | |
| "learning_rate": 4.986115012299915e-06, | |
| "loss": 0.7608, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.1344404332129965, | |
| "grad_norm": 0.026899464428424835, | |
| "learning_rate": 4.986852479644916e-06, | |
| "loss": 0.8069, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.135595667870036, | |
| "grad_norm": 0.026683615520596504, | |
| "learning_rate": 4.987589196386893e-06, | |
| "loss": 0.7694, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.136750902527076, | |
| "grad_norm": 0.028294900432229042, | |
| "learning_rate": 4.988325164052236e-06, | |
| "loss": 0.7738, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.1379061371841155, | |
| "grad_norm": 0.026210768148303032, | |
| "learning_rate": 4.989060384162687e-06, | |
| "loss": 0.7624, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.1390613718411553, | |
| "grad_norm": 0.026641108095645905, | |
| "learning_rate": 4.989794858235352e-06, | |
| "loss": 0.7827, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.140216606498195, | |
| "grad_norm": 0.027551405131816864, | |
| "learning_rate": 4.990528587782728e-06, | |
| "loss": 0.7849, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.1413718411552347, | |
| "grad_norm": 0.026633942499756813, | |
| "learning_rate": 4.991261574312715e-06, | |
| "loss": 0.8019, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.1425270758122743, | |
| "grad_norm": 0.027740631252527237, | |
| "learning_rate": 4.991993819328633e-06, | |
| "loss": 0.8075, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.1436823104693141, | |
| "grad_norm": 0.027059046551585197, | |
| "learning_rate": 4.992725324329251e-06, | |
| "loss": 0.7916, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.1448375451263537, | |
| "grad_norm": 0.027994418516755104, | |
| "learning_rate": 4.993456090808793e-06, | |
| "loss": 0.7943, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.1459927797833935, | |
| "grad_norm": 0.026348162442445755, | |
| "learning_rate": 4.994186120256964e-06, | |
| "loss": 0.7993, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.1471480144404331, | |
| "grad_norm": 0.027533039450645447, | |
| "learning_rate": 4.99491541415897e-06, | |
| "loss": 0.7806, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.148303249097473, | |
| "grad_norm": 0.02699647657573223, | |
| "learning_rate": 4.995643973995523e-06, | |
| "loss": 0.7753, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.1494584837545125, | |
| "grad_norm": 0.025739947333931923, | |
| "learning_rate": 4.996371801242876e-06, | |
| "loss": 0.7487, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.1506137184115524, | |
| "grad_norm": 0.026720624417066574, | |
| "learning_rate": 4.997098897372832e-06, | |
| "loss": 0.7776, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.1517689530685922, | |
| "grad_norm": 0.027732260525226593, | |
| "learning_rate": 4.99782526385276e-06, | |
| "loss": 0.7718, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.1529241877256318, | |
| "grad_norm": 0.026213763281702995, | |
| "learning_rate": 4.998550902145619e-06, | |
| "loss": 0.7849, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.1540794223826714, | |
| "grad_norm": 0.0285005122423172, | |
| "learning_rate": 4.999275813709971e-06, | |
| "loss": 0.7944, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.1552346570397112, | |
| "grad_norm": 0.025959184393286705, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8039, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.156389891696751, | |
| "grad_norm": 0.026393594220280647, | |
| "learning_rate": 4.999976851655541e-06, | |
| "loss": 0.792, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.1575451263537906, | |
| "grad_norm": 0.02741307206451893, | |
| "learning_rate": 4.999907407050885e-06, | |
| "loss": 0.7816, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.1587003610108304, | |
| "grad_norm": 0.02743341587483883, | |
| "learning_rate": 4.999791667472181e-06, | |
| "loss": 0.7473, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.15985559566787, | |
| "grad_norm": 0.025454385206103325, | |
| "learning_rate": 4.999629635062988e-06, | |
| "loss": 0.7964, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.1610108303249098, | |
| "grad_norm": 0.029390884563326836, | |
| "learning_rate": 4.999421312824233e-06, | |
| "loss": 0.7885, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.1621660649819494, | |
| "grad_norm": 0.026846162974834442, | |
| "learning_rate": 4.99916670461415e-06, | |
| "loss": 0.7553, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.1633212996389892, | |
| "grad_norm": 0.027146577835083008, | |
| "learning_rate": 4.998865815148222e-06, | |
| "loss": 0.757, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.1644765342960288, | |
| "grad_norm": 0.028116164728999138, | |
| "learning_rate": 4.998518649999077e-06, | |
| "loss": 0.793, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.1656317689530686, | |
| "grad_norm": 0.027374397963285446, | |
| "learning_rate": 4.998125215596399e-06, | |
| "loss": 0.7684, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.1667870036101082, | |
| "grad_norm": 0.026328133419156075, | |
| "learning_rate": 4.9976855192268e-06, | |
| "loss": 0.794, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.167942238267148, | |
| "grad_norm": 0.028758594766259193, | |
| "learning_rate": 4.997199569033688e-06, | |
| "loss": 0.7872, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.1690974729241876, | |
| "grad_norm": 0.026665737852454185, | |
| "learning_rate": 4.996667374017118e-06, | |
| "loss": 0.797, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.1702527075812275, | |
| "grad_norm": 0.02736995927989483, | |
| "learning_rate": 4.996088944033622e-06, | |
| "loss": 0.7912, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.171407942238267, | |
| "grad_norm": 0.027897171676158905, | |
| "learning_rate": 4.995464289796029e-06, | |
| "loss": 0.7714, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.1725631768953069, | |
| "grad_norm": 0.027566097676753998, | |
| "learning_rate": 4.994793422873265e-06, | |
| "loss": 0.7755, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.1737184115523465, | |
| "grad_norm": 0.026376303285360336, | |
| "learning_rate": 4.994076355690139e-06, | |
| "loss": 0.8052, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.1748736462093863, | |
| "grad_norm": 0.027220716699957848, | |
| "learning_rate": 4.993313101527112e-06, | |
| "loss": 0.7955, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.176028880866426, | |
| "grad_norm": 0.02771547995507717, | |
| "learning_rate": 4.992503674520057e-06, | |
| "loss": 0.7595, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.1771841155234657, | |
| "grad_norm": 0.026792939752340317, | |
| "learning_rate": 4.991648089659987e-06, | |
| "loss": 0.7812, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.1783393501805055, | |
| "grad_norm": 0.026456760242581367, | |
| "learning_rate": 4.990746362792786e-06, | |
| "loss": 0.7884, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.179494584837545, | |
| "grad_norm": 0.028625672683119774, | |
| "learning_rate": 4.989798510618911e-06, | |
| "loss": 0.7999, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.180649819494585, | |
| "grad_norm": 0.026891304180026054, | |
| "learning_rate": 4.988804550693086e-06, | |
| "loss": 0.7949, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.1818050541516245, | |
| "grad_norm": 0.025918442755937576, | |
| "learning_rate": 4.9877645014239686e-06, | |
| "loss": 0.793, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.1829602888086643, | |
| "grad_norm": 0.026719292625784874, | |
| "learning_rate": 4.9866783820738255e-06, | |
| "loss": 0.7915, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.184115523465704, | |
| "grad_norm": 0.027593158185482025, | |
| "learning_rate": 4.985546212758157e-06, | |
| "loss": 0.7678, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.1852707581227437, | |
| "grad_norm": 0.02738940343260765, | |
| "learning_rate": 4.984368014445336e-06, | |
| "loss": 0.79, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.1864259927797833, | |
| "grad_norm": 0.02584446780383587, | |
| "learning_rate": 4.983143808956219e-06, | |
| "loss": 0.7638, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.1875812274368232, | |
| "grad_norm": 0.03016565926373005, | |
| "learning_rate": 4.981873618963737e-06, | |
| "loss": 0.7906, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.1887364620938627, | |
| "grad_norm": 0.026769449934363365, | |
| "learning_rate": 4.980557467992476e-06, | |
| "loss": 0.7743, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.1898916967509026, | |
| "grad_norm": 0.02779136225581169, | |
| "learning_rate": 4.97919538041825e-06, | |
| "loss": 0.7917, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.1910469314079422, | |
| "grad_norm": 0.028257260099053383, | |
| "learning_rate": 4.977787381467638e-06, | |
| "loss": 0.774, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.192202166064982, | |
| "grad_norm": 0.026401590555906296, | |
| "learning_rate": 4.976333497217524e-06, | |
| "loss": 0.7989, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.1933574007220216, | |
| "grad_norm": 0.028177831321954727, | |
| "learning_rate": 4.974833754594611e-06, | |
| "loss": 0.7832, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.1945126353790614, | |
| "grad_norm": 0.028338629752397537, | |
| "learning_rate": 4.9732881813749236e-06, | |
| "loss": 0.7991, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.1956678700361012, | |
| "grad_norm": 0.027354028075933456, | |
| "learning_rate": 4.9716968061832935e-06, | |
| "loss": 0.7917, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.1968231046931408, | |
| "grad_norm": 0.02721875160932541, | |
| "learning_rate": 4.9700596584928275e-06, | |
| "loss": 0.7765, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.1979783393501804, | |
| "grad_norm": 0.026294628158211708, | |
| "learning_rate": 4.968376768624368e-06, | |
| "loss": 0.7871, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.1991335740072202, | |
| "grad_norm": 0.027313537895679474, | |
| "learning_rate": 4.96664816774592e-06, | |
| "loss": 0.7858, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.20028880866426, | |
| "grad_norm": 0.02785896323621273, | |
| "learning_rate": 4.964873887872085e-06, | |
| "loss": 0.7938, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.2014440433212996, | |
| "grad_norm": 0.02687663771212101, | |
| "learning_rate": 4.963053961863463e-06, | |
| "loss": 0.7776, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2025992779783394, | |
| "grad_norm": 0.026713555678725243, | |
| "learning_rate": 4.961188423426042e-06, | |
| "loss": 0.7741, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.203754512635379, | |
| "grad_norm": 0.02718759886920452, | |
| "learning_rate": 4.959277307110582e-06, | |
| "loss": 0.7667, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.2049097472924188, | |
| "grad_norm": 0.027160272002220154, | |
| "learning_rate": 4.957320648311961e-06, | |
| "loss": 0.7894, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.2060649819494584, | |
| "grad_norm": 0.02789607271552086, | |
| "learning_rate": 4.955318483268535e-06, | |
| "loss": 0.7812, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.2072202166064983, | |
| "grad_norm": 0.02740464359521866, | |
| "learning_rate": 4.953270849061456e-06, | |
| "loss": 0.7585, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.2083754512635378, | |
| "grad_norm": 0.028817661106586456, | |
| "learning_rate": 4.951177783613991e-06, | |
| "loss": 0.8091, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2095306859205777, | |
| "grad_norm": 0.026185913011431694, | |
| "learning_rate": 4.9490393256908144e-06, | |
| "loss": 0.7956, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.2106859205776173, | |
| "grad_norm": 0.029244182631373405, | |
| "learning_rate": 4.946855514897297e-06, | |
| "loss": 0.7779, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.211841155234657, | |
| "grad_norm": 0.027704982087016106, | |
| "learning_rate": 4.944626391678766e-06, | |
| "loss": 0.7993, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.2129963898916967, | |
| "grad_norm": 0.02663758210837841, | |
| "learning_rate": 4.942351997319761e-06, | |
| "loss": 0.812, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2141516245487365, | |
| "grad_norm": 0.026222404092550278, | |
| "learning_rate": 4.940032373943262e-06, | |
| "loss": 0.7558, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.215306859205776, | |
| "grad_norm": 0.02578769624233246, | |
| "learning_rate": 4.937667564509923e-06, | |
| "loss": 0.7781, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.216462093862816, | |
| "grad_norm": 0.02732442319393158, | |
| "learning_rate": 4.935257612817259e-06, | |
| "loss": 0.7916, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.2176173285198555, | |
| "grad_norm": 0.029004095122218132, | |
| "learning_rate": 4.932802563498847e-06, | |
| "loss": 0.7979, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2187725631768953, | |
| "grad_norm": 0.02736952155828476, | |
| "learning_rate": 4.9303024620234966e-06, | |
| "loss": 0.7984, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.2199277978339351, | |
| "grad_norm": 0.026861999183893204, | |
| "learning_rate": 4.927757354694406e-06, | |
| "loss": 0.7986, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.2210830324909747, | |
| "grad_norm": 0.0265817791223526, | |
| "learning_rate": 4.925167288648305e-06, | |
| "loss": 0.7763, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.2222382671480145, | |
| "grad_norm": 0.028195342049002647, | |
| "learning_rate": 4.922532311854585e-06, | |
| "loss": 0.7967, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2233935018050541, | |
| "grad_norm": 0.028484554961323738, | |
| "learning_rate": 4.919852473114405e-06, | |
| "loss": 0.7926, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.224548736462094, | |
| "grad_norm": 0.025893505662679672, | |
| "learning_rate": 4.917127822059792e-06, | |
| "loss": 0.783, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2257039711191335, | |
| "grad_norm": 0.028708558529615402, | |
| "learning_rate": 4.9143584091527236e-06, | |
| "loss": 0.7954, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.2268592057761734, | |
| "grad_norm": 0.025731157511472702, | |
| "learning_rate": 4.911544285684186e-06, | |
| "loss": 0.7875, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.228014440433213, | |
| "grad_norm": 0.02737441286444664, | |
| "learning_rate": 4.9086855037732315e-06, | |
| "loss": 0.7618, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.2291696750902528, | |
| "grad_norm": 0.026509573683142662, | |
| "learning_rate": 4.9057821163660085e-06, | |
| "loss": 0.7513, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.2303249097472924, | |
| "grad_norm": 0.027171408757567406, | |
| "learning_rate": 4.9028341772347845e-06, | |
| "loss": 0.7825, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.2314801444043322, | |
| "grad_norm": 0.02830067090690136, | |
| "learning_rate": 4.899841740976947e-06, | |
| "loss": 0.7784, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.2326353790613718, | |
| "grad_norm": 0.02830381691455841, | |
| "learning_rate": 4.8968048630139945e-06, | |
| "loss": 0.8057, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.2337906137184116, | |
| "grad_norm": 0.027096690610051155, | |
| "learning_rate": 4.89372359959051e-06, | |
| "loss": 0.778, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.2349458483754512, | |
| "grad_norm": 0.026616832241415977, | |
| "learning_rate": 4.890598007773116e-06, | |
| "loss": 0.7719, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.236101083032491, | |
| "grad_norm": 0.02842404507100582, | |
| "learning_rate": 4.887428145449425e-06, | |
| "loss": 0.7898, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.2372563176895306, | |
| "grad_norm": 0.029439568519592285, | |
| "learning_rate": 4.884214071326957e-06, | |
| "loss": 0.7586, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.2384115523465704, | |
| "grad_norm": 0.027274589985609055, | |
| "learning_rate": 4.880955844932061e-06, | |
| "loss": 0.7779, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.2395667870036102, | |
| "grad_norm": 0.030161483213305473, | |
| "learning_rate": 4.877653526608811e-06, | |
| "loss": 0.7774, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.2407220216606498, | |
| "grad_norm": 0.026620803400874138, | |
| "learning_rate": 4.874307177517882e-06, | |
| "loss": 0.7714, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.2418772563176894, | |
| "grad_norm": 0.031550221145153046, | |
| "learning_rate": 4.870916859635426e-06, | |
| "loss": 0.7951, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.2430324909747292, | |
| "grad_norm": 0.02677975781261921, | |
| "learning_rate": 4.86748263575192e-06, | |
| "loss": 0.8094, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.244187725631769, | |
| "grad_norm": 0.02815908007323742, | |
| "learning_rate": 4.864004569471e-06, | |
| "loss": 0.7828, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.2453429602888086, | |
| "grad_norm": 0.028923533856868744, | |
| "learning_rate": 4.860482725208291e-06, | |
| "loss": 0.8106, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.2464981949458485, | |
| "grad_norm": 0.026107341051101685, | |
| "learning_rate": 4.856917168190205e-06, | |
| "loss": 0.7893, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.247653429602888, | |
| "grad_norm": 0.029115671291947365, | |
| "learning_rate": 4.853307964452739e-06, | |
| "loss": 0.749, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.2488086642599279, | |
| "grad_norm": 0.02833879366517067, | |
| "learning_rate": 4.849655180840253e-06, | |
| "loss": 0.8041, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.2499638989169675, | |
| "grad_norm": 0.027344336733222008, | |
| "learning_rate": 4.8459588850042255e-06, | |
| "loss": 0.7455, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.2511191335740073, | |
| "grad_norm": 0.02860669605433941, | |
| "learning_rate": 4.8422191454020055e-06, | |
| "loss": 0.7995, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.2522743682310469, | |
| "grad_norm": 0.026610156521201134, | |
| "learning_rate": 4.838436031295543e-06, | |
| "loss": 0.7776, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.2534296028880867, | |
| "grad_norm": 0.029180224984884262, | |
| "learning_rate": 4.834609612750108e-06, | |
| "loss": 0.7755, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.2545848375451263, | |
| "grad_norm": 0.025666316971182823, | |
| "learning_rate": 4.8307399606329906e-06, | |
| "loss": 0.7815, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.255740072202166, | |
| "grad_norm": 0.028815045952796936, | |
| "learning_rate": 4.82682714661219e-06, | |
| "loss": 0.7618, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.2568953068592057, | |
| "grad_norm": 0.02659596875309944, | |
| "learning_rate": 4.822871243155085e-06, | |
| "loss": 0.7906, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.2580505415162455, | |
| "grad_norm": 0.027543647214770317, | |
| "learning_rate": 4.818872323527097e-06, | |
| "loss": 0.7871, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.2592057761732853, | |
| "grad_norm": 0.027373237535357475, | |
| "learning_rate": 4.814830461790327e-06, | |
| "loss": 0.763, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.260361010830325, | |
| "grad_norm": 0.025914175435900688, | |
| "learning_rate": 4.810745732802186e-06, | |
| "loss": 0.7744, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.2615162454873645, | |
| "grad_norm": 0.026502788066864014, | |
| "learning_rate": 4.806618212214009e-06, | |
| "loss": 0.7901, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.2626714801444043, | |
| "grad_norm": 0.026811202988028526, | |
| "learning_rate": 4.802447976469658e-06, | |
| "loss": 0.797, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.2638267148014442, | |
| "grad_norm": 0.027904195711016655, | |
| "learning_rate": 4.7982351028040966e-06, | |
| "loss": 0.7956, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.2649819494584837, | |
| "grad_norm": 0.025807814672589302, | |
| "learning_rate": 4.79397966924197e-06, | |
| "loss": 0.7785, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.2661371841155233, | |
| "grad_norm": 0.02669427916407585, | |
| "learning_rate": 4.789681754596152e-06, | |
| "loss": 0.7882, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.2672924187725632, | |
| "grad_norm": 0.026608362793922424, | |
| "learning_rate": 4.785341438466289e-06, | |
| "loss": 0.7716, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.268447653429603, | |
| "grad_norm": 0.025953490287065506, | |
| "learning_rate": 4.7809588012373255e-06, | |
| "loss": 0.7702, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.2696028880866426, | |
| "grad_norm": 0.02710854634642601, | |
| "learning_rate": 4.776533924078018e-06, | |
| "loss": 0.7734, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.2707581227436824, | |
| "grad_norm": 0.027277441695332527, | |
| "learning_rate": 4.772066888939423e-06, | |
| "loss": 0.7894, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.271913357400722, | |
| "grad_norm": 0.025714052841067314, | |
| "learning_rate": 4.767557778553389e-06, | |
| "loss": 0.7658, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.2730685920577618, | |
| "grad_norm": 0.02643239125609398, | |
| "learning_rate": 4.763006676431021e-06, | |
| "loss": 0.775, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.2742238267148014, | |
| "grad_norm": 0.028819050639867783, | |
| "learning_rate": 4.75841366686113e-06, | |
| "loss": 0.7823, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.2753790613718412, | |
| "grad_norm": 0.026137549430131912, | |
| "learning_rate": 4.753778834908678e-06, | |
| "loss": 0.7607, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.2765342960288808, | |
| "grad_norm": 0.027080422267317772, | |
| "learning_rate": 4.749102266413195e-06, | |
| "loss": 0.7739, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.2776895306859206, | |
| "grad_norm": 0.026252347975969315, | |
| "learning_rate": 4.744384047987202e-06, | |
| "loss": 0.7914, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.2788447653429602, | |
| "grad_norm": 0.02620067074894905, | |
| "learning_rate": 4.739624267014589e-06, | |
| "loss": 0.754, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.026302233338356018, | |
| "learning_rate": 4.734823011649013e-06, | |
| "loss": 0.7964, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.2811552346570396, | |
| "grad_norm": 0.02616049535572529, | |
| "learning_rate": 4.7299803708122565e-06, | |
| "loss": 0.7831, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.2823104693140794, | |
| "grad_norm": 0.02667761594057083, | |
| "learning_rate": 4.725096434192583e-06, | |
| "loss": 0.7651, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.2834657039711193, | |
| "grad_norm": 0.026840372011065483, | |
| "learning_rate": 4.720171292243076e-06, | |
| "loss": 0.7776, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.2846209386281588, | |
| "grad_norm": 0.027080677449703217, | |
| "learning_rate": 4.7152050361799626e-06, | |
| "loss": 0.7804, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.2857761732851984, | |
| "grad_norm": 0.026866400614380836, | |
| "learning_rate": 4.710197757980923e-06, | |
| "loss": 0.798, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.2869314079422383, | |
| "grad_norm": 0.025592336431145668, | |
| "learning_rate": 4.7051495503833955e-06, | |
| "loss": 0.7717, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.288086642599278, | |
| "grad_norm": 0.02509910985827446, | |
| "learning_rate": 4.700060506882846e-06, | |
| "loss": 0.7494, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.2892418772563177, | |
| "grad_norm": 0.02745307981967926, | |
| "learning_rate": 4.694930721731046e-06, | |
| "loss": 0.7955, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.2903971119133573, | |
| "grad_norm": 0.025139886885881424, | |
| "learning_rate": 4.689760289934322e-06, | |
| "loss": 0.7752, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.291552346570397, | |
| "grad_norm": 0.025314299389719963, | |
| "learning_rate": 4.684549307251799e-06, | |
| "loss": 0.7631, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.292707581227437, | |
| "grad_norm": 0.027501968666911125, | |
| "learning_rate": 4.679297870193628e-06, | |
| "loss": 0.7844, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.2938628158844765, | |
| "grad_norm": 0.026297206059098244, | |
| "learning_rate": 4.6740060760191905e-06, | |
| "loss": 0.74, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.2950180505415163, | |
| "grad_norm": 0.025890007615089417, | |
| "learning_rate": 4.668674022735311e-06, | |
| "loss": 0.7923, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.296173285198556, | |
| "grad_norm": 0.026110520586371422, | |
| "learning_rate": 4.663301809094425e-06, | |
| "loss": 0.7899, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.2973285198555957, | |
| "grad_norm": 0.026979975402355194, | |
| "learning_rate": 4.657889534592767e-06, | |
| "loss": 0.7831, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.2984837545126353, | |
| "grad_norm": 0.026237450540065765, | |
| "learning_rate": 4.652437299468514e-06, | |
| "loss": 0.7728, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.2996389891696751, | |
| "grad_norm": 0.02676897682249546, | |
| "learning_rate": 4.646945204699939e-06, | |
| "loss": 0.7736, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.3007942238267147, | |
| "grad_norm": 0.02639893814921379, | |
| "learning_rate": 4.641413352003534e-06, | |
| "loss": 0.7738, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.3019494584837545, | |
| "grad_norm": 0.026181381195783615, | |
| "learning_rate": 4.63584184383213e-06, | |
| "loss": 0.7766, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.3031046931407944, | |
| "grad_norm": 0.029100872576236725, | |
| "learning_rate": 4.630230783372998e-06, | |
| "loss": 0.7939, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.304259927797834, | |
| "grad_norm": 0.027207808569073677, | |
| "learning_rate": 4.624580274545938e-06, | |
| "loss": 0.7701, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.3054151624548735, | |
| "grad_norm": 0.027454031631350517, | |
| "learning_rate": 4.618890422001355e-06, | |
| "loss": 0.7979, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.3065703971119134, | |
| "grad_norm": 0.02654033899307251, | |
| "learning_rate": 4.61316133111832e-06, | |
| "loss": 0.7473, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.3077256317689532, | |
| "grad_norm": 0.02540537528693676, | |
| "learning_rate": 4.607393108002618e-06, | |
| "loss": 0.7663, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.3088808664259928, | |
| "grad_norm": 0.026679422706365585, | |
| "learning_rate": 4.601585859484786e-06, | |
| "loss": 0.7811, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.3100361010830324, | |
| "grad_norm": 0.02628672868013382, | |
| "learning_rate": 4.595739693118132e-06, | |
| "loss": 0.7836, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.3111913357400722, | |
| "grad_norm": 0.026694132015109062, | |
| "learning_rate": 4.5898547171767375e-06, | |
| "loss": 0.7708, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.312346570397112, | |
| "grad_norm": 0.02691902220249176, | |
| "learning_rate": 4.583931040653466e-06, | |
| "loss": 0.772, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3135018050541516, | |
| "grad_norm": 0.02754952199757099, | |
| "learning_rate": 4.5779687732579285e-06, | |
| "loss": 0.783, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.3146570397111914, | |
| "grad_norm": 0.025518443435430527, | |
| "learning_rate": 4.571968025414466e-06, | |
| "loss": 0.7736, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.315812274368231, | |
| "grad_norm": 0.025681914761662483, | |
| "learning_rate": 4.565928908260093e-06, | |
| "loss": 0.7823, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.3169675090252708, | |
| "grad_norm": 0.028058268129825592, | |
| "learning_rate": 4.559851533642446e-06, | |
| "loss": 0.7966, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3181227436823104, | |
| "grad_norm": 0.027302585542201996, | |
| "learning_rate": 4.55373601411771e-06, | |
| "loss": 0.8043, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.3192779783393502, | |
| "grad_norm": 0.027836551889777184, | |
| "learning_rate": 4.547582462948533e-06, | |
| "loss": 0.7903, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3204332129963898, | |
| "grad_norm": 0.027944888919591904, | |
| "learning_rate": 4.541390994101928e-06, | |
| "loss": 0.7834, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.3215884476534296, | |
| "grad_norm": 0.026407793164253235, | |
| "learning_rate": 4.535161722247168e-06, | |
| "loss": 0.7774, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3227436823104692, | |
| "grad_norm": 0.026148339733481407, | |
| "learning_rate": 4.528894762753651e-06, | |
| "loss": 0.7909, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.323898916967509, | |
| "grad_norm": 0.027284881100058556, | |
| "learning_rate": 4.522590231688775e-06, | |
| "loss": 0.7885, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.3250541516245486, | |
| "grad_norm": 0.026616675779223442, | |
| "learning_rate": 4.516248245815782e-06, | |
| "loss": 0.7924, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.3262093862815885, | |
| "grad_norm": 0.026135679334402084, | |
| "learning_rate": 4.509868922591595e-06, | |
| "loss": 0.7845, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.3273646209386283, | |
| "grad_norm": 0.027222778648138046, | |
| "learning_rate": 4.503452380164648e-06, | |
| "loss": 0.7983, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.3285198555956679, | |
| "grad_norm": 0.02566412277519703, | |
| "learning_rate": 4.496998737372691e-06, | |
| "loss": 0.7561, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.3296750902527075, | |
| "grad_norm": 0.026768745854496956, | |
| "learning_rate": 4.490508113740594e-06, | |
| "loss": 0.7753, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.3308303249097473, | |
| "grad_norm": 0.027144471183419228, | |
| "learning_rate": 4.483980629478131e-06, | |
| "loss": 0.769, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.331985559566787, | |
| "grad_norm": 0.0260478463023901, | |
| "learning_rate": 4.477416405477754e-06, | |
| "loss": 0.7879, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.3331407942238267, | |
| "grad_norm": 0.025150155648589134, | |
| "learning_rate": 4.470815563312356e-06, | |
| "loss": 0.7795, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.3342960288808663, | |
| "grad_norm": 0.026815764605998993, | |
| "learning_rate": 4.464178225233016e-06, | |
| "loss": 0.7577, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.335451263537906, | |
| "grad_norm": 0.02605642005801201, | |
| "learning_rate": 4.457504514166738e-06, | |
| "loss": 0.7617, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.336606498194946, | |
| "grad_norm": 0.02583806961774826, | |
| "learning_rate": 4.4507945537141714e-06, | |
| "loss": 0.7924, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.3377617328519855, | |
| "grad_norm": 0.027298036962747574, | |
| "learning_rate": 4.444048468147324e-06, | |
| "loss": 0.7756, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.3389169675090253, | |
| "grad_norm": 0.02701791748404503, | |
| "learning_rate": 4.4372663824072595e-06, | |
| "loss": 0.785, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.340072202166065, | |
| "grad_norm": 0.02676539123058319, | |
| "learning_rate": 4.430448422101786e-06, | |
| "loss": 0.7914, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.3412274368231047, | |
| "grad_norm": 0.026288233697414398, | |
| "learning_rate": 4.423594713503124e-06, | |
| "loss": 0.7948, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.3423826714801443, | |
| "grad_norm": 0.02532966062426567, | |
| "learning_rate": 4.416705383545574e-06, | |
| "loss": 0.7571, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.3435379061371842, | |
| "grad_norm": 0.025268996134400368, | |
| "learning_rate": 4.409780559823159e-06, | |
| "loss": 0.7544, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.3446931407942238, | |
| "grad_norm": 0.025901716202497482, | |
| "learning_rate": 4.402820370587272e-06, | |
| "loss": 0.7928, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.3458483754512636, | |
| "grad_norm": 0.026023518294095993, | |
| "learning_rate": 4.395824944744287e-06, | |
| "loss": 0.7821, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.3470036101083034, | |
| "grad_norm": 0.025839975103735924, | |
| "learning_rate": 4.388794411853183e-06, | |
| "loss": 0.7563, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.348158844765343, | |
| "grad_norm": 0.02656048908829689, | |
| "learning_rate": 4.381728902123138e-06, | |
| "loss": 0.7675, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.3493140794223826, | |
| "grad_norm": 0.025841468945145607, | |
| "learning_rate": 4.374628546411121e-06, | |
| "loss": 0.7839, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.3504693140794224, | |
| "grad_norm": 0.02622513473033905, | |
| "learning_rate": 4.367493476219464e-06, | |
| "loss": 0.7764, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.3516245487364622, | |
| "grad_norm": 0.0271604061126709, | |
| "learning_rate": 4.360323823693434e-06, | |
| "loss": 0.7873, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.3527797833935018, | |
| "grad_norm": 0.025581173598766327, | |
| "learning_rate": 4.353119721618778e-06, | |
| "loss": 0.7719, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.3539350180505414, | |
| "grad_norm": 0.027338258922100067, | |
| "learning_rate": 4.345881303419266e-06, | |
| "loss": 0.8023, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.3550902527075812, | |
| "grad_norm": 0.024792378768324852, | |
| "learning_rate": 4.338608703154224e-06, | |
| "loss": 0.7612, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.356245487364621, | |
| "grad_norm": 0.02713456004858017, | |
| "learning_rate": 4.3313020555160445e-06, | |
| "loss": 0.7735, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.3574007220216606, | |
| "grad_norm": 0.026577133685350418, | |
| "learning_rate": 4.323961495827698e-06, | |
| "loss": 0.7942, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.3585559566787004, | |
| "grad_norm": 0.02608906850218773, | |
| "learning_rate": 4.316587160040222e-06, | |
| "loss": 0.7675, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.35971119133574, | |
| "grad_norm": 0.026449469849467278, | |
| "learning_rate": 4.309179184730207e-06, | |
| "loss": 0.7586, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.3608664259927798, | |
| "grad_norm": 0.02639465406537056, | |
| "learning_rate": 4.301737707097264e-06, | |
| "loss": 0.7792, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.3620216606498194, | |
| "grad_norm": 0.026730941608548164, | |
| "learning_rate": 4.294262864961486e-06, | |
| "loss": 0.77, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.3631768953068593, | |
| "grad_norm": 0.02690793387591839, | |
| "learning_rate": 4.286754796760892e-06, | |
| "loss": 0.7898, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.3643321299638989, | |
| "grad_norm": 0.02708018571138382, | |
| "learning_rate": 4.279213641548867e-06, | |
| "loss": 0.7644, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.3654873646209387, | |
| "grad_norm": 0.02672554738819599, | |
| "learning_rate": 4.271639538991584e-06, | |
| "loss": 0.7484, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.3666425992779783, | |
| "grad_norm": 0.028112929314374924, | |
| "learning_rate": 4.264032629365418e-06, | |
| "loss": 0.8274, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.367797833935018, | |
| "grad_norm": 0.026468411087989807, | |
| "learning_rate": 4.256393053554351e-06, | |
| "loss": 0.7438, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.3689530685920577, | |
| "grad_norm": 0.02600760944187641, | |
| "learning_rate": 4.248720953047355e-06, | |
| "loss": 0.764, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.3701083032490975, | |
| "grad_norm": 0.026687582954764366, | |
| "learning_rate": 4.241016469935782e-06, | |
| "loss": 0.764, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.3712635379061373, | |
| "grad_norm": 0.028025714680552483, | |
| "learning_rate": 4.233279746910722e-06, | |
| "loss": 0.7729, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.372418772563177, | |
| "grad_norm": 0.028168534860014915, | |
| "learning_rate": 4.225510927260369e-06, | |
| "loss": 0.7901, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.3735740072202165, | |
| "grad_norm": 0.027783645316958427, | |
| "learning_rate": 4.217710154867362e-06, | |
| "loss": 0.8064, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.3747292418772563, | |
| "grad_norm": 0.025505684316158295, | |
| "learning_rate": 4.209877574206121e-06, | |
| "loss": 0.7248, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.3758844765342961, | |
| "grad_norm": 0.02887566015124321, | |
| "learning_rate": 4.2020133303401714e-06, | |
| "loss": 0.8044, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.3770397111913357, | |
| "grad_norm": 0.02724134922027588, | |
| "learning_rate": 4.1941175689194615e-06, | |
| "loss": 0.771, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.3781949458483753, | |
| "grad_norm": 0.026623480021953583, | |
| "learning_rate": 4.186190436177655e-06, | |
| "loss": 0.7926, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.3793501805054151, | |
| "grad_norm": 0.026381775736808777, | |
| "learning_rate": 4.178232078929437e-06, | |
| "loss": 0.7658, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.380505415162455, | |
| "grad_norm": 0.027352899312973022, | |
| "learning_rate": 4.170242644567781e-06, | |
| "loss": 0.7598, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.3816606498194945, | |
| "grad_norm": 0.027212858200073242, | |
| "learning_rate": 4.162222281061226e-06, | |
| "loss": 0.8047, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.3828158844765344, | |
| "grad_norm": 0.02757844887673855, | |
| "learning_rate": 4.154171136951136e-06, | |
| "loss": 0.7707, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.383971119133574, | |
| "grad_norm": 0.02665034867823124, | |
| "learning_rate": 4.146089361348948e-06, | |
| "loss": 0.7659, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.3851263537906138, | |
| "grad_norm": 0.02670939266681671, | |
| "learning_rate": 4.137977103933411e-06, | |
| "loss": 0.7898, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.3862815884476534, | |
| "grad_norm": 0.02680130861699581, | |
| "learning_rate": 4.1298345149478115e-06, | |
| "loss": 0.8019, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.3874368231046932, | |
| "grad_norm": 0.02670557238161564, | |
| "learning_rate": 4.121661745197195e-06, | |
| "loss": 0.7749, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.3885920577617328, | |
| "grad_norm": 0.02557177096605301, | |
| "learning_rate": 4.113458946045569e-06, | |
| "loss": 0.7807, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.3897472924187726, | |
| "grad_norm": 0.027189351618289948, | |
| "learning_rate": 4.1052262694131e-06, | |
| "loss": 0.8127, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.3909025270758124, | |
| "grad_norm": 0.026801222935318947, | |
| "learning_rate": 4.096963867773306e-06, | |
| "loss": 0.7887, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.392057761732852, | |
| "grad_norm": 0.027125921100378036, | |
| "learning_rate": 4.0886718941502215e-06, | |
| "loss": 0.7705, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.3932129963898916, | |
| "grad_norm": 0.027330022305250168, | |
| "learning_rate": 4.080350502115575e-06, | |
| "loss": 0.7655, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.3943682310469314, | |
| "grad_norm": 0.025932665914297104, | |
| "learning_rate": 4.071999845785938e-06, | |
| "loss": 0.7839, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.3955234657039712, | |
| "grad_norm": 0.02584053948521614, | |
| "learning_rate": 4.06362007981987e-06, | |
| "loss": 0.7676, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.3966787003610108, | |
| "grad_norm": 0.025614172220230103, | |
| "learning_rate": 4.055211359415058e-06, | |
| "loss": 0.7685, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.3978339350180504, | |
| "grad_norm": 0.026585347950458527, | |
| "learning_rate": 4.0467738403054425e-06, | |
| "loss": 0.7716, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.3989891696750902, | |
| "grad_norm": 0.02616872265934944, | |
| "learning_rate": 4.038307678758328e-06, | |
| "loss": 0.8029, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.40014440433213, | |
| "grad_norm": 0.02624761499464512, | |
| "learning_rate": 4.0298130315714934e-06, | |
| "loss": 0.766, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.4012996389891696, | |
| "grad_norm": 0.025757934898138046, | |
| "learning_rate": 4.021290056070287e-06, | |
| "loss": 0.7724, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.4024548736462095, | |
| "grad_norm": 0.027395786717534065, | |
| "learning_rate": 4.012738910104711e-06, | |
| "loss": 0.7987, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.403610108303249, | |
| "grad_norm": 0.0256296806037426, | |
| "learning_rate": 4.004159752046501e-06, | |
| "loss": 0.7516, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.4047653429602889, | |
| "grad_norm": 0.027848385274410248, | |
| "learning_rate": 3.99555274078619e-06, | |
| "loss": 0.7778, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.4059205776173285, | |
| "grad_norm": 0.02629752829670906, | |
| "learning_rate": 3.986918035730169e-06, | |
| "loss": 0.7689, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.4070758122743683, | |
| "grad_norm": 0.027596496045589447, | |
| "learning_rate": 3.9782557967977295e-06, | |
| "loss": 0.7976, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.4082310469314079, | |
| "grad_norm": 0.02730746567249298, | |
| "learning_rate": 3.969566184418109e-06, | |
| "loss": 0.7775, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.4093862815884477, | |
| "grad_norm": 0.025179557502269745, | |
| "learning_rate": 3.960849359527515e-06, | |
| "loss": 0.7639, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.4105415162454873, | |
| "grad_norm": 0.026616785675287247, | |
| "learning_rate": 3.952105483566139e-06, | |
| "loss": 0.7583, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.411696750902527, | |
| "grad_norm": 0.027852557599544525, | |
| "learning_rate": 3.94333471847518e-06, | |
| "loss": 0.7936, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.4128519855595667, | |
| "grad_norm": 0.026742927730083466, | |
| "learning_rate": 3.934537226693836e-06, | |
| "loss": 0.761, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.4140072202166065, | |
| "grad_norm": 0.025545494630932808, | |
| "learning_rate": 3.925713171156295e-06, | |
| "loss": 0.7952, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.4151624548736463, | |
| "grad_norm": 0.02676277793943882, | |
| "learning_rate": 3.9168627152887214e-06, | |
| "loss": 0.7623, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.416317689530686, | |
| "grad_norm": 0.024539409205317497, | |
| "learning_rate": 3.9079860230062286e-06, | |
| "loss": 0.7451, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.4174729241877255, | |
| "grad_norm": 0.026942811906337738, | |
| "learning_rate": 3.8990832587098396e-06, | |
| "loss": 0.7825, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.4186281588447653, | |
| "grad_norm": 0.026073571294546127, | |
| "learning_rate": 3.890154587283447e-06, | |
| "loss": 0.7788, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.4197833935018052, | |
| "grad_norm": 0.02608587220311165, | |
| "learning_rate": 3.881200174090758e-06, | |
| "loss": 0.7752, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.4209386281588448, | |
| "grad_norm": 0.02780032530426979, | |
| "learning_rate": 3.8722201849722264e-06, | |
| "loss": 0.7834, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.4220938628158843, | |
| "grad_norm": 0.025982137769460678, | |
| "learning_rate": 3.863214786241993e-06, | |
| "loss": 0.7633, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.4232490974729242, | |
| "grad_norm": 0.026244191452860832, | |
| "learning_rate": 3.85418414468479e-06, | |
| "loss": 0.7901, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.424404332129964, | |
| "grad_norm": 0.028074799105525017, | |
| "learning_rate": 3.845128427552865e-06, | |
| "loss": 0.7579, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.4255595667870036, | |
| "grad_norm": 0.027848297730088234, | |
| "learning_rate": 3.836047802562878e-06, | |
| "loss": 0.7633, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.4267148014440434, | |
| "grad_norm": 0.026308046653866768, | |
| "learning_rate": 3.8269424378927925e-06, | |
| "loss": 0.7959, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.427870036101083, | |
| "grad_norm": 0.02657618559896946, | |
| "learning_rate": 3.81781250217877e-06, | |
| "loss": 0.782, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.4290252707581228, | |
| "grad_norm": 0.02592160366475582, | |
| "learning_rate": 3.8086581645120347e-06, | |
| "loss": 0.7875, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.4301805054151624, | |
| "grad_norm": 0.02750803716480732, | |
| "learning_rate": 3.799479594435751e-06, | |
| "loss": 0.7712, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.4313357400722022, | |
| "grad_norm": 0.02752981334924698, | |
| "learning_rate": 3.7902769619418783e-06, | |
| "loss": 0.8094, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.4324909747292418, | |
| "grad_norm": 0.025965960696339607, | |
| "learning_rate": 3.7810504374680274e-06, | |
| "loss": 0.7687, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.4336462093862816, | |
| "grad_norm": 0.025762738659977913, | |
| "learning_rate": 3.7718001918942982e-06, | |
| "loss": 0.7943, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.4348014440433212, | |
| "grad_norm": 0.026879120618104935, | |
| "learning_rate": 3.762526396540121e-06, | |
| "loss": 0.7735, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.435956678700361, | |
| "grad_norm": 0.02714950405061245, | |
| "learning_rate": 3.7532292231610774e-06, | |
| "loss": 0.7641, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.4371119133574006, | |
| "grad_norm": 0.02679312974214554, | |
| "learning_rate": 3.7439088439457257e-06, | |
| "loss": 0.7627, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.4382671480144404, | |
| "grad_norm": 0.027805205434560776, | |
| "learning_rate": 3.7345654315124035e-06, | |
| "loss": 0.7811, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.4394223826714803, | |
| "grad_norm": 0.02763986401259899, | |
| "learning_rate": 3.725199158906041e-06, | |
| "loss": 0.7924, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.4405776173285199, | |
| "grad_norm": 0.026331216096878052, | |
| "learning_rate": 3.7158101995949486e-06, | |
| "loss": 0.7601, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.4417328519855594, | |
| "grad_norm": 0.025142505764961243, | |
| "learning_rate": 3.706398727467606e-06, | |
| "loss": 0.7624, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.4428880866425993, | |
| "grad_norm": 0.02704186551272869, | |
| "learning_rate": 3.696964916829443e-06, | |
| "loss": 0.7867, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.444043321299639, | |
| "grad_norm": 0.027327412739396095, | |
| "learning_rate": 3.6875089423996097e-06, | |
| "loss": 0.7861, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.4451985559566787, | |
| "grad_norm": 0.027434786781668663, | |
| "learning_rate": 3.678030979307741e-06, | |
| "loss": 0.7869, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.4463537906137185, | |
| "grad_norm": 0.02410939894616604, | |
| "learning_rate": 3.6685312030907167e-06, | |
| "loss": 0.7568, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.447509025270758, | |
| "grad_norm": 0.02866499125957489, | |
| "learning_rate": 3.6590097896894017e-06, | |
| "loss": 0.8089, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.448664259927798, | |
| "grad_norm": 0.025909971445798874, | |
| "learning_rate": 3.6494669154453978e-06, | |
| "loss": 0.794, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.4498194945848375, | |
| "grad_norm": 0.026149652898311615, | |
| "learning_rate": 3.6399027570977703e-06, | |
| "loss": 0.7657, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.4509747292418773, | |
| "grad_norm": 0.027486305683851242, | |
| "learning_rate": 3.6303174917797783e-06, | |
| "loss": 0.7573, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.452129963898917, | |
| "grad_norm": 0.026965491473674774, | |
| "learning_rate": 3.6207112970155915e-06, | |
| "loss": 0.7739, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.4532851985559567, | |
| "grad_norm": 0.026813145726919174, | |
| "learning_rate": 3.611084350717008e-06, | |
| "loss": 0.7612, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.4544404332129963, | |
| "grad_norm": 0.027455372735857964, | |
| "learning_rate": 3.60143683118015e-06, | |
| "loss": 0.7892, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.4555956678700361, | |
| "grad_norm": 0.027145925909280777, | |
| "learning_rate": 3.5917689170821714e-06, | |
| "loss": 0.7806, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.4567509025270757, | |
| "grad_norm": 0.02541586011648178, | |
| "learning_rate": 3.582080787477941e-06, | |
| "loss": 0.7772, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.4579061371841155, | |
| "grad_norm": 0.028484217822551727, | |
| "learning_rate": 3.5723726217967315e-06, | |
| "loss": 0.7926, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.4590613718411554, | |
| "grad_norm": 0.027112239971756935, | |
| "learning_rate": 3.5626445998388926e-06, | |
| "loss": 0.7819, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.460216606498195, | |
| "grad_norm": 0.027995990589261055, | |
| "learning_rate": 3.552896901772525e-06, | |
| "loss": 0.7845, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.4613718411552346, | |
| "grad_norm": 0.026287071406841278, | |
| "learning_rate": 3.543129708130139e-06, | |
| "loss": 0.7979, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.4625270758122744, | |
| "grad_norm": 0.026646843180060387, | |
| "learning_rate": 3.533343199805315e-06, | |
| "loss": 0.7869, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.4636823104693142, | |
| "grad_norm": 0.025323543697595596, | |
| "learning_rate": 3.52353755804935e-06, | |
| "loss": 0.7765, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.4648375451263538, | |
| "grad_norm": 0.025138625875115395, | |
| "learning_rate": 3.5137129644679035e-06, | |
| "loss": 0.7606, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.4659927797833934, | |
| "grad_norm": 0.026612093672156334, | |
| "learning_rate": 3.5038696010176316e-06, | |
| "loss": 0.7624, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.4671480144404332, | |
| "grad_norm": 0.025180159136652946, | |
| "learning_rate": 3.4940076500028193e-06, | |
| "loss": 0.7704, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.468303249097473, | |
| "grad_norm": 0.0247277170419693, | |
| "learning_rate": 3.484127294072003e-06, | |
| "loss": 0.7799, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.4694584837545126, | |
| "grad_norm": 0.025542214512825012, | |
| "learning_rate": 3.474228716214588e-06, | |
| "loss": 0.7859, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.4706137184115524, | |
| "grad_norm": 0.02490636333823204, | |
| "learning_rate": 3.464312099757457e-06, | |
| "loss": 0.7553, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.471768953068592, | |
| "grad_norm": 0.027338897809386253, | |
| "learning_rate": 3.4543776283615815e-06, | |
| "loss": 0.7869, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.4729241877256318, | |
| "grad_norm": 0.0249380711466074, | |
| "learning_rate": 3.444425486018611e-06, | |
| "loss": 0.7734, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.4740794223826714, | |
| "grad_norm": 0.02633252553641796, | |
| "learning_rate": 3.434455857047475e-06, | |
| "loss": 0.7714, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.4752346570397112, | |
| "grad_norm": 0.026209495961666107, | |
| "learning_rate": 3.424468926090961e-06, | |
| "loss": 0.7912, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.4763898916967508, | |
| "grad_norm": 0.02743818424642086, | |
| "learning_rate": 3.4144648781122987e-06, | |
| "loss": 0.7725, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.4775451263537906, | |
| "grad_norm": 0.02479531429708004, | |
| "learning_rate": 3.404443898391737e-06, | |
| "loss": 0.7548, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.4787003610108302, | |
| "grad_norm": 0.026155853644013405, | |
| "learning_rate": 3.3944061725231055e-06, | |
| "loss": 0.7882, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.47985559566787, | |
| "grad_norm": 0.02570260874927044, | |
| "learning_rate": 3.3843518864103857e-06, | |
| "loss": 0.7617, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.4810108303249097, | |
| "grad_norm": 0.026218149811029434, | |
| "learning_rate": 3.374281226264259e-06, | |
| "loss": 0.7798, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.4821660649819495, | |
| "grad_norm": 0.025268124416470528, | |
| "learning_rate": 3.364194378598666e-06, | |
| "loss": 0.7647, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.4833212996389893, | |
| "grad_norm": 0.028134865686297417, | |
| "learning_rate": 3.3540915302273476e-06, | |
| "loss": 0.777, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.4844765342960289, | |
| "grad_norm": 0.026391401886940002, | |
| "learning_rate": 3.343972868260385e-06, | |
| "loss": 0.7704, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.4856317689530685, | |
| "grad_norm": 0.025334736332297325, | |
| "learning_rate": 3.333838580100737e-06, | |
| "loss": 0.7465, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.4867870036101083, | |
| "grad_norm": 0.025972386822104454, | |
| "learning_rate": 3.32368885344077e-06, | |
| "loss": 0.798, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.487942238267148, | |
| "grad_norm": 0.02676592580974102, | |
| "learning_rate": 3.3135238762587723e-06, | |
| "loss": 0.7946, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.4890974729241877, | |
| "grad_norm": 0.026947803795337677, | |
| "learning_rate": 3.3033438368154886e-06, | |
| "loss": 0.7762, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.4902527075812273, | |
| "grad_norm": 0.02568644843995571, | |
| "learning_rate": 3.2931489236506183e-06, | |
| "loss": 0.7977, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.4914079422382671, | |
| "grad_norm": 0.02489590272307396, | |
| "learning_rate": 3.282939325579333e-06, | |
| "loss": 0.7634, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.492563176895307, | |
| "grad_norm": 0.02679099142551422, | |
| "learning_rate": 3.272715231688775e-06, | |
| "loss": 0.7917, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.4937184115523465, | |
| "grad_norm": 0.025316089391708374, | |
| "learning_rate": 3.2624768313345594e-06, | |
| "loss": 0.7507, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.4948736462093863, | |
| "grad_norm": 0.024965766817331314, | |
| "learning_rate": 3.25222431413726e-06, | |
| "loss": 0.7751, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.496028880866426, | |
| "grad_norm": 0.026891566812992096, | |
| "learning_rate": 3.241957869978907e-06, | |
| "loss": 0.7697, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.4971841155234658, | |
| "grad_norm": 0.02724074199795723, | |
| "learning_rate": 3.2316776889994627e-06, | |
| "loss": 0.8087, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.4983393501805053, | |
| "grad_norm": 0.025544699281454086, | |
| "learning_rate": 3.221383961593303e-06, | |
| "loss": 0.7511, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.4994945848375452, | |
| "grad_norm": 0.02615639939904213, | |
| "learning_rate": 3.2110768784056884e-06, | |
| "loss": 0.7741, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.5006498194945848, | |
| "grad_norm": 0.026993228122591972, | |
| "learning_rate": 3.2007566303292416e-06, | |
| "loss": 0.7631, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 1.5018050541516246, | |
| "grad_norm": 0.02476736344397068, | |
| "learning_rate": 3.1904234085003976e-06, | |
| "loss": 0.7574, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5029602888086644, | |
| "grad_norm": 0.025930294767022133, | |
| "learning_rate": 3.180077404295881e-06, | |
| "loss": 0.8047, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 1.504115523465704, | |
| "grad_norm": 0.025386760011315346, | |
| "learning_rate": 3.1697188093291454e-06, | |
| "loss": 0.7663, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.5052707581227436, | |
| "grad_norm": 0.025852493941783905, | |
| "learning_rate": 3.1593478154468364e-06, | |
| "loss": 0.8022, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 1.5064259927797834, | |
| "grad_norm": 0.024930665269494057, | |
| "learning_rate": 3.148964614725232e-06, | |
| "loss": 0.779, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.5075812274368232, | |
| "grad_norm": 0.025383364409208298, | |
| "learning_rate": 3.138569399466689e-06, | |
| "loss": 0.7833, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.5087364620938628, | |
| "grad_norm": 0.026384184136986732, | |
| "learning_rate": 3.128162362196077e-06, | |
| "loss": 0.7753, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.5098916967509024, | |
| "grad_norm": 0.02558097243309021, | |
| "learning_rate": 3.117743695657219e-06, | |
| "loss": 0.7648, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 1.5110469314079422, | |
| "grad_norm": 0.02592851221561432, | |
| "learning_rate": 3.107313592809315e-06, | |
| "loss": 0.7819, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.512202166064982, | |
| "grad_norm": 0.02552459016442299, | |
| "learning_rate": 3.0968722468233743e-06, | |
| "loss": 0.7831, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 1.5133574007220216, | |
| "grad_norm": 0.026385486125946045, | |
| "learning_rate": 3.086419851078632e-06, | |
| "loss": 0.7874, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.5145126353790612, | |
| "grad_norm": 0.025948703289031982, | |
| "learning_rate": 3.0759565991589733e-06, | |
| "loss": 0.7687, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 1.515667870036101, | |
| "grad_norm": 0.02731943503022194, | |
| "learning_rate": 3.0654826848493436e-06, | |
| "loss": 0.7722, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.5168231046931409, | |
| "grad_norm": 0.02646474353969097, | |
| "learning_rate": 3.0549983021321634e-06, | |
| "loss": 0.7731, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 1.5179783393501805, | |
| "grad_norm": 0.0249773059040308, | |
| "learning_rate": 3.044503645183731e-06, | |
| "loss": 0.7773, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.5191335740072203, | |
| "grad_norm": 0.0276559516787529, | |
| "learning_rate": 3.0339989083706306e-06, | |
| "loss": 0.7911, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.5202888086642599, | |
| "grad_norm": 0.026679757982492447, | |
| "learning_rate": 3.0234842862461307e-06, | |
| "loss": 0.7383, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.5214440433212997, | |
| "grad_norm": 0.02656986005604267, | |
| "learning_rate": 3.0129599735465816e-06, | |
| "loss": 0.766, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 1.5225992779783395, | |
| "grad_norm": 0.024834152311086655, | |
| "learning_rate": 3.0024261651878056e-06, | |
| "loss": 0.7661, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.523754512635379, | |
| "grad_norm": 0.026497021317481995, | |
| "learning_rate": 2.9918830562614927e-06, | |
| "loss": 0.785, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 1.5249097472924187, | |
| "grad_norm": 0.02679789625108242, | |
| "learning_rate": 2.9813308420315825e-06, | |
| "loss": 0.7814, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.5260649819494585, | |
| "grad_norm": 0.02663526125252247, | |
| "learning_rate": 2.9707697179306505e-06, | |
| "loss": 0.7875, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 1.5272202166064983, | |
| "grad_norm": 0.02751866728067398, | |
| "learning_rate": 2.9601998795562865e-06, | |
| "loss": 0.7683, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.528375451263538, | |
| "grad_norm": 0.026420842856168747, | |
| "learning_rate": 2.9496215226674747e-06, | |
| "loss": 0.7671, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 1.5295306859205775, | |
| "grad_norm": 0.026854515075683594, | |
| "learning_rate": 2.9390348431809656e-06, | |
| "loss": 0.7705, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.5306859205776173, | |
| "grad_norm": 0.025556296110153198, | |
| "learning_rate": 2.9284400371676486e-06, | |
| "loss": 0.7771, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.5318411552346571, | |
| "grad_norm": 0.025579258799552917, | |
| "learning_rate": 2.917837300848923e-06, | |
| "loss": 0.7683, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.5329963898916967, | |
| "grad_norm": 0.027229176834225655, | |
| "learning_rate": 2.9072268305930578e-06, | |
| "loss": 0.789, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 1.5341516245487363, | |
| "grad_norm": 0.025784116238355637, | |
| "learning_rate": 2.896608822911561e-06, | |
| "loss": 0.7699, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.5353068592057761, | |
| "grad_norm": 0.02648346498608589, | |
| "learning_rate": 2.8859834744555377e-06, | |
| "loss": 0.773, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 1.536462093862816, | |
| "grad_norm": 0.025939345359802246, | |
| "learning_rate": 2.8753509820120464e-06, | |
| "loss": 0.791, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.5376173285198556, | |
| "grad_norm": 0.025402942672371864, | |
| "learning_rate": 2.864711542500457e-06, | |
| "loss": 0.7643, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 1.5387725631768951, | |
| "grad_norm": 0.026020778343081474, | |
| "learning_rate": 2.8540653529688033e-06, | |
| "loss": 0.7813, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.539927797833935, | |
| "grad_norm": 0.026241673156619072, | |
| "learning_rate": 2.8434126105901333e-06, | |
| "loss": 0.7905, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 1.5410830324909748, | |
| "grad_norm": 0.02648642659187317, | |
| "learning_rate": 2.8327535126588563e-06, | |
| "loss": 0.771, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.5422382671480146, | |
| "grad_norm": 0.02610144577920437, | |
| "learning_rate": 2.8220882565870893e-06, | |
| "loss": 0.792, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.5433935018050542, | |
| "grad_norm": 0.025254933163523674, | |
| "learning_rate": 2.811417039901001e-06, | |
| "loss": 0.7674, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.5445487364620938, | |
| "grad_norm": 0.025538668036460876, | |
| "learning_rate": 2.800740060237157e-06, | |
| "loss": 0.7745, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 1.5457039711191336, | |
| "grad_norm": 0.025685755535960197, | |
| "learning_rate": 2.7900575153388525e-06, | |
| "loss": 0.7645, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.5468592057761734, | |
| "grad_norm": 0.026221225038170815, | |
| "learning_rate": 2.779369603052456e-06, | |
| "loss": 0.8149, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 1.548014440433213, | |
| "grad_norm": 0.02525060623884201, | |
| "learning_rate": 2.768676521323742e-06, | |
| "loss": 0.7552, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.5491696750902526, | |
| "grad_norm": 0.026158245280385017, | |
| "learning_rate": 2.757978468194226e-06, | |
| "loss": 0.7712, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 1.5503249097472924, | |
| "grad_norm": 0.02666587382555008, | |
| "learning_rate": 2.747275641797496e-06, | |
| "loss": 0.7511, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.5514801444043322, | |
| "grad_norm": 0.026437923312187195, | |
| "learning_rate": 2.736568240355544e-06, | |
| "loss": 0.7659, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 1.5526353790613718, | |
| "grad_norm": 0.0248862411826849, | |
| "learning_rate": 2.725856462175095e-06, | |
| "loss": 0.7547, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.5537906137184114, | |
| "grad_norm": 0.026547906920313835, | |
| "learning_rate": 2.715140505643932e-06, | |
| "loss": 0.7996, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.5549458483754512, | |
| "grad_norm": 0.02599526010453701, | |
| "learning_rate": 2.704420569227224e-06, | |
| "loss": 0.7641, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.556101083032491, | |
| "grad_norm": 0.02453439123928547, | |
| "learning_rate": 2.6936968514638495e-06, | |
| "loss": 0.7466, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 1.5572563176895307, | |
| "grad_norm": 0.027833128347992897, | |
| "learning_rate": 2.68296955096272e-06, | |
| "loss": 0.7826, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.5584115523465703, | |
| "grad_norm": 0.026869479566812515, | |
| "learning_rate": 2.672238866399099e-06, | |
| "loss": 0.789, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 1.55956678700361, | |
| "grad_norm": 0.025666510686278343, | |
| "learning_rate": 2.6615049965109296e-06, | |
| "loss": 0.784, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.5607220216606499, | |
| "grad_norm": 0.02734527923166752, | |
| "learning_rate": 2.6507681400951414e-06, | |
| "loss": 0.7934, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 1.5618772563176895, | |
| "grad_norm": 0.025378704071044922, | |
| "learning_rate": 2.640028496003984e-06, | |
| "loss": 0.7554, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.5630324909747293, | |
| "grad_norm": 0.02528882399201393, | |
| "learning_rate": 2.629286263141329e-06, | |
| "loss": 0.7652, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 1.5641877256317689, | |
| "grad_norm": 0.0248930174857378, | |
| "learning_rate": 2.6185416404589985e-06, | |
| "loss": 0.7515, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.5653429602888087, | |
| "grad_norm": 0.025928398594260216, | |
| "learning_rate": 2.607794826953075e-06, | |
| "loss": 0.7839, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.5664981949458485, | |
| "grad_norm": 0.025345591828227043, | |
| "learning_rate": 2.5970460216602133e-06, | |
| "loss": 0.773, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.5676534296028881, | |
| "grad_norm": 0.02638527937233448, | |
| "learning_rate": 2.5862954236539587e-06, | |
| "loss": 0.7641, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 1.5688086642599277, | |
| "grad_norm": 0.025557557120919228, | |
| "learning_rate": 2.5755432320410594e-06, | |
| "loss": 0.7869, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.5699638989169675, | |
| "grad_norm": 0.025332162156701088, | |
| "learning_rate": 2.5647896459577757e-06, | |
| "loss": 0.7873, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 1.5711191335740073, | |
| "grad_norm": 0.025012118741869926, | |
| "learning_rate": 2.5540348645661965e-06, | |
| "loss": 0.7805, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.572274368231047, | |
| "grad_norm": 0.02651703916490078, | |
| "learning_rate": 2.5432790870505487e-06, | |
| "loss": 0.791, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 1.5734296028880865, | |
| "grad_norm": 0.025666911154985428, | |
| "learning_rate": 2.5325225126135045e-06, | |
| "loss": 0.7814, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.5745848375451263, | |
| "grad_norm": 0.02593865618109703, | |
| "learning_rate": 2.521765340472499e-06, | |
| "loss": 0.8046, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 1.5757400722021662, | |
| "grad_norm": 0.025667283684015274, | |
| "learning_rate": 2.5110077698560364e-06, | |
| "loss": 0.757, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.5768953068592058, | |
| "grad_norm": 0.025065291672945023, | |
| "learning_rate": 2.5002500000000003e-06, | |
| "loss": 0.7729, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.5780505415162454, | |
| "grad_norm": 0.025896169245243073, | |
| "learning_rate": 2.4894922301439643e-06, | |
| "loss": 0.7825, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.5792057761732852, | |
| "grad_norm": 0.02581976167857647, | |
| "learning_rate": 2.478734659527501e-06, | |
| "loss": 0.7783, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 1.580361010830325, | |
| "grad_norm": 0.025588102638721466, | |
| "learning_rate": 2.467977487386497e-06, | |
| "loss": 0.7617, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.5815162454873646, | |
| "grad_norm": 0.02477039210498333, | |
| "learning_rate": 2.4572209129494524e-06, | |
| "loss": 0.7494, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 1.5826714801444042, | |
| "grad_norm": 0.027211980894207954, | |
| "learning_rate": 2.4464651354338037e-06, | |
| "loss": 0.8013, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.583826714801444, | |
| "grad_norm": 0.02574116736650467, | |
| "learning_rate": 2.435710354042225e-06, | |
| "loss": 0.7706, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 1.5849819494584838, | |
| "grad_norm": 0.025494717061519623, | |
| "learning_rate": 2.4249567679589417e-06, | |
| "loss": 0.8006, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.5861371841155236, | |
| "grad_norm": 0.024281838908791542, | |
| "learning_rate": 2.4142045763460415e-06, | |
| "loss": 0.7609, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 1.5872924187725632, | |
| "grad_norm": 0.025773445144295692, | |
| "learning_rate": 2.403453978339787e-06, | |
| "loss": 0.8048, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.5884476534296028, | |
| "grad_norm": 0.02534416690468788, | |
| "learning_rate": 2.392705173046925e-06, | |
| "loss": 0.7731, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.5896028880866426, | |
| "grad_norm": 0.02497626096010208, | |
| "learning_rate": 2.3819583595410013e-06, | |
| "loss": 0.7858, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.5907581227436824, | |
| "grad_norm": 0.02502652071416378, | |
| "learning_rate": 2.3712137368586717e-06, | |
| "loss": 0.7438, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 1.591913357400722, | |
| "grad_norm": 0.024966862052679062, | |
| "learning_rate": 2.3604715039960173e-06, | |
| "loss": 0.7835, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.5930685920577616, | |
| "grad_norm": 0.025360535830259323, | |
| "learning_rate": 2.3497318599048592e-06, | |
| "loss": 0.7716, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 1.5942238267148015, | |
| "grad_norm": 0.026140306144952774, | |
| "learning_rate": 2.338995003489072e-06, | |
| "loss": 0.7556, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.5953790613718413, | |
| "grad_norm": 0.02430606074631214, | |
| "learning_rate": 2.328261133600902e-06, | |
| "loss": 0.7708, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 1.5965342960288809, | |
| "grad_norm": 0.02544374018907547, | |
| "learning_rate": 2.317530449037281e-06, | |
| "loss": 0.7837, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.5976895306859205, | |
| "grad_norm": 0.025840749964118004, | |
| "learning_rate": 2.3068031485361516e-06, | |
| "loss": 0.7575, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 1.5988447653429603, | |
| "grad_norm": 0.027307111769914627, | |
| "learning_rate": 2.296079430772777e-06, | |
| "loss": 0.8002, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.026851478964090347, | |
| "learning_rate": 2.285359494356068e-06, | |
| "loss": 0.8076, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.6011552346570397, | |
| "grad_norm": 0.025269558653235435, | |
| "learning_rate": 2.274643537824905e-06, | |
| "loss": 0.7601, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.6023104693140793, | |
| "grad_norm": 0.024502936750650406, | |
| "learning_rate": 2.263931759644456e-06, | |
| "loss": 0.7777, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 1.603465703971119, | |
| "grad_norm": 0.025695212185382843, | |
| "learning_rate": 2.2532243582025048e-06, | |
| "loss": 0.7779, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.604620938628159, | |
| "grad_norm": 0.025510141626000404, | |
| "learning_rate": 2.242521531805775e-06, | |
| "loss": 0.7623, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 1.6057761732851985, | |
| "grad_norm": 0.025397466495633125, | |
| "learning_rate": 2.2318234786762586e-06, | |
| "loss": 0.769, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.6069314079422383, | |
| "grad_norm": 0.02594076469540596, | |
| "learning_rate": 2.2211303969475444e-06, | |
| "loss": 0.8051, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 1.608086642599278, | |
| "grad_norm": 0.024837691336870193, | |
| "learning_rate": 2.210442484661148e-06, | |
| "loss": 0.8117, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.6092418772563177, | |
| "grad_norm": 0.026649747043848038, | |
| "learning_rate": 2.199759939762843e-06, | |
| "loss": 0.7768, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 1.6103971119133575, | |
| "grad_norm": 0.026813820004463196, | |
| "learning_rate": 2.1890829600989995e-06, | |
| "loss": 0.7575, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.6115523465703971, | |
| "grad_norm": 0.026133766397833824, | |
| "learning_rate": 2.1784117434129113e-06, | |
| "loss": 0.7599, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 1.6127075812274367, | |
| "grad_norm": 0.026183927431702614, | |
| "learning_rate": 2.167746487341144e-06, | |
| "loss": 0.7661, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.6138628158844766, | |
| "grad_norm": 0.02729174494743347, | |
| "learning_rate": 2.157087389409867e-06, | |
| "loss": 0.7698, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 1.6150180505415164, | |
| "grad_norm": 0.026295259594917297, | |
| "learning_rate": 2.1464346470311965e-06, | |
| "loss": 0.7695, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.616173285198556, | |
| "grad_norm": 0.026081614196300507, | |
| "learning_rate": 2.135788457499544e-06, | |
| "loss": 0.7622, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 1.6173285198555956, | |
| "grad_norm": 0.025280749425292015, | |
| "learning_rate": 2.1251490179879547e-06, | |
| "loss": 0.7639, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.6184837545126354, | |
| "grad_norm": 0.02538752183318138, | |
| "learning_rate": 2.1145165255444643e-06, | |
| "loss": 0.7893, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 1.6196389891696752, | |
| "grad_norm": 0.026337897405028343, | |
| "learning_rate": 2.1038911770884395e-06, | |
| "loss": 0.7936, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.6207942238267148, | |
| "grad_norm": 0.027165520936250687, | |
| "learning_rate": 2.0932731694069438e-06, | |
| "loss": 0.7506, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 1.6219494584837544, | |
| "grad_norm": 0.02547120675444603, | |
| "learning_rate": 2.0826626991510775e-06, | |
| "loss": 0.7623, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.6231046931407942, | |
| "grad_norm": 0.023889468982815742, | |
| "learning_rate": 2.072059962832352e-06, | |
| "loss": 0.7444, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 1.624259927797834, | |
| "grad_norm": 0.025629183277487755, | |
| "learning_rate": 2.0614651568190354e-06, | |
| "loss": 0.7869, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.6254151624548736, | |
| "grad_norm": 0.02670186199247837, | |
| "learning_rate": 2.050878477332526e-06, | |
| "loss": 0.8075, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 1.6265703971119132, | |
| "grad_norm": 0.025403102859854698, | |
| "learning_rate": 2.040300120443714e-06, | |
| "loss": 0.7647, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.627725631768953, | |
| "grad_norm": 0.025784939527511597, | |
| "learning_rate": 2.02973028206935e-06, | |
| "loss": 0.7594, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 1.6288808664259928, | |
| "grad_norm": 0.024970732629299164, | |
| "learning_rate": 2.019169157968418e-06, | |
| "loss": 0.7664, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.6300361010830327, | |
| "grad_norm": 0.025724420323967934, | |
| "learning_rate": 2.008616943738508e-06, | |
| "loss": 0.7637, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 1.6311913357400722, | |
| "grad_norm": 0.02508534863591194, | |
| "learning_rate": 1.9980738348121955e-06, | |
| "loss": 0.7674, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.6323465703971118, | |
| "grad_norm": 0.024077199399471283, | |
| "learning_rate": 1.9875400264534194e-06, | |
| "loss": 0.7703, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 1.6335018050541517, | |
| "grad_norm": 0.02483062632381916, | |
| "learning_rate": 1.97701571375387e-06, | |
| "loss": 0.7825, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.6346570397111915, | |
| "grad_norm": 0.025150980800390244, | |
| "learning_rate": 1.9665010916293704e-06, | |
| "loss": 0.7875, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 1.635812274368231, | |
| "grad_norm": 0.02459767460823059, | |
| "learning_rate": 1.95599635481627e-06, | |
| "loss": 0.7745, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.6369675090252707, | |
| "grad_norm": 0.024894608184695244, | |
| "learning_rate": 1.9455016978678377e-06, | |
| "loss": 0.7825, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 1.6381227436823105, | |
| "grad_norm": 0.02483339235186577, | |
| "learning_rate": 1.935017315150656e-06, | |
| "loss": 0.7654, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.6392779783393503, | |
| "grad_norm": 0.025194387882947922, | |
| "learning_rate": 1.9245434008410273e-06, | |
| "loss": 0.7513, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 1.6404332129963899, | |
| "grad_norm": 0.025890490040183067, | |
| "learning_rate": 1.914080148921368e-06, | |
| "loss": 0.7756, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.6415884476534295, | |
| "grad_norm": 0.025699496269226074, | |
| "learning_rate": 1.903627753176627e-06, | |
| "loss": 0.7702, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 1.6427436823104693, | |
| "grad_norm": 0.02435440570116043, | |
| "learning_rate": 1.893186407190685e-06, | |
| "loss": 0.7727, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.6438989169675091, | |
| "grad_norm": 0.024738416075706482, | |
| "learning_rate": 1.8827563043427818e-06, | |
| "loss": 0.7856, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 1.6450541516245487, | |
| "grad_norm": 0.025477735325694084, | |
| "learning_rate": 1.8723376378039229e-06, | |
| "loss": 0.775, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.6462093862815883, | |
| "grad_norm": 0.02498653531074524, | |
| "learning_rate": 1.8619306005333118e-06, | |
| "loss": 0.7464, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.6473646209386281, | |
| "grad_norm": 0.025513045489788055, | |
| "learning_rate": 1.851535385274768e-06, | |
| "loss": 0.7944, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.648519855595668, | |
| "grad_norm": 0.024236127734184265, | |
| "learning_rate": 1.8411521845531638e-06, | |
| "loss": 0.7646, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 1.6496750902527075, | |
| "grad_norm": 0.025221243500709534, | |
| "learning_rate": 1.8307811906708552e-06, | |
| "loss": 0.7802, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.6508303249097471, | |
| "grad_norm": 0.024581240490078926, | |
| "learning_rate": 1.8204225957041197e-06, | |
| "loss": 0.7686, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 1.651985559566787, | |
| "grad_norm": 0.025168737396597862, | |
| "learning_rate": 1.8100765914996026e-06, | |
| "loss": 0.79, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.6531407942238268, | |
| "grad_norm": 0.024541551247239113, | |
| "learning_rate": 1.7997433696707597e-06, | |
| "loss": 0.766, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 1.6542960288808666, | |
| "grad_norm": 0.02476644702255726, | |
| "learning_rate": 1.789423121594312e-06, | |
| "loss": 0.7718, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.6554512635379062, | |
| "grad_norm": 0.027230584993958473, | |
| "learning_rate": 1.779116038406698e-06, | |
| "loss": 0.7813, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 1.6566064981949458, | |
| "grad_norm": 0.025931481271982193, | |
| "learning_rate": 1.768822311000538e-06, | |
| "loss": 0.7772, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.6577617328519856, | |
| "grad_norm": 0.025082925334572792, | |
| "learning_rate": 1.7585421300210932e-06, | |
| "loss": 0.7958, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 1.6589169675090254, | |
| "grad_norm": 0.02672106772661209, | |
| "learning_rate": 1.7482756858627406e-06, | |
| "loss": 0.774, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.660072202166065, | |
| "grad_norm": 0.026696957647800446, | |
| "learning_rate": 1.7380231686654419e-06, | |
| "loss": 0.7763, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 1.6612274368231046, | |
| "grad_norm": 0.025145625695586205, | |
| "learning_rate": 1.7277847683112251e-06, | |
| "loss": 0.7602, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.6623826714801444, | |
| "grad_norm": 0.025679778307676315, | |
| "learning_rate": 1.7175606744206683e-06, | |
| "loss": 0.7772, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 1.6635379061371842, | |
| "grad_norm": 0.02643396146595478, | |
| "learning_rate": 1.707351076349382e-06, | |
| "loss": 0.7776, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.6646931407942238, | |
| "grad_norm": 0.025024591013789177, | |
| "learning_rate": 1.697156163184512e-06, | |
| "loss": 0.769, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 1.6658483754512634, | |
| "grad_norm": 0.02451016753911972, | |
| "learning_rate": 1.6869761237412275e-06, | |
| "loss": 0.7614, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.6670036101083032, | |
| "grad_norm": 0.02593870460987091, | |
| "learning_rate": 1.6768111465592312e-06, | |
| "loss": 0.7828, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 1.668158844765343, | |
| "grad_norm": 0.024905387312173843, | |
| "learning_rate": 1.6666614198992625e-06, | |
| "loss": 0.7847, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.6693140794223826, | |
| "grad_norm": 0.025363383814692497, | |
| "learning_rate": 1.6565271317396156e-06, | |
| "loss": 0.7757, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 1.6704693140794222, | |
| "grad_norm": 0.02425825409591198, | |
| "learning_rate": 1.6464084697726532e-06, | |
| "loss": 0.756, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.671624548736462, | |
| "grad_norm": 0.025201529264450073, | |
| "learning_rate": 1.636305621401335e-06, | |
| "loss": 0.7701, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 1.6727797833935019, | |
| "grad_norm": 0.02498772367835045, | |
| "learning_rate": 1.626218773735742e-06, | |
| "loss": 0.8012, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.6739350180505417, | |
| "grad_norm": 0.02500982955098152, | |
| "learning_rate": 1.6161481135896154e-06, | |
| "loss": 0.7796, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 1.6750902527075813, | |
| "grad_norm": 0.025785304605960846, | |
| "learning_rate": 1.606093827476895e-06, | |
| "loss": 0.7584, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.6762454873646209, | |
| "grad_norm": 0.024516083300113678, | |
| "learning_rate": 1.5960561016082638e-06, | |
| "loss": 0.7812, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 1.6774007220216607, | |
| "grad_norm": 0.025357436388731003, | |
| "learning_rate": 1.5860351218877021e-06, | |
| "loss": 0.7642, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.6785559566787005, | |
| "grad_norm": 0.025423161685466766, | |
| "learning_rate": 1.5760310739090402e-06, | |
| "loss": 0.7838, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 1.67971119133574, | |
| "grad_norm": 0.025020305067300797, | |
| "learning_rate": 1.5660441429525258e-06, | |
| "loss": 0.7713, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.6808664259927797, | |
| "grad_norm": 0.02521519362926483, | |
| "learning_rate": 1.556074513981389e-06, | |
| "loss": 0.8022, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 1.6820216606498195, | |
| "grad_norm": 0.02515571005642414, | |
| "learning_rate": 1.5461223716384196e-06, | |
| "loss": 0.7794, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.6831768953068593, | |
| "grad_norm": 0.025751549750566483, | |
| "learning_rate": 1.5361879002425432e-06, | |
| "loss": 0.8067, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 1.684332129963899, | |
| "grad_norm": 0.024167675524950027, | |
| "learning_rate": 1.5262712837854139e-06, | |
| "loss": 0.7497, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.6854873646209385, | |
| "grad_norm": 0.025504915043711662, | |
| "learning_rate": 1.5163727059279972e-06, | |
| "loss": 0.7645, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 1.6866425992779783, | |
| "grad_norm": 0.026490593329072, | |
| "learning_rate": 1.506492349997181e-06, | |
| "loss": 0.7698, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.6877978339350181, | |
| "grad_norm": 0.024468280375003815, | |
| "learning_rate": 1.4966303989823688e-06, | |
| "loss": 0.7541, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 1.6889530685920577, | |
| "grad_norm": 0.02487323433160782, | |
| "learning_rate": 1.4867870355320972e-06, | |
| "loss": 0.7611, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.6901083032490973, | |
| "grad_norm": 0.024844063445925713, | |
| "learning_rate": 1.4769624419506507e-06, | |
| "loss": 0.7598, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 1.6912635379061371, | |
| "grad_norm": 0.024609016254544258, | |
| "learning_rate": 1.4671568001946851e-06, | |
| "loss": 0.7863, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.692418772563177, | |
| "grad_norm": 0.025440316647291183, | |
| "learning_rate": 1.4573702918698615e-06, | |
| "loss": 0.7955, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 1.6935740072202166, | |
| "grad_norm": 0.024768877774477005, | |
| "learning_rate": 1.4476030982274746e-06, | |
| "loss": 0.7666, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.6947292418772562, | |
| "grad_norm": 0.024406442418694496, | |
| "learning_rate": 1.4378554001611087e-06, | |
| "loss": 0.747, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 1.695884476534296, | |
| "grad_norm": 0.02518375776708126, | |
| "learning_rate": 1.4281273782032696e-06, | |
| "loss": 0.7914, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.6970397111913358, | |
| "grad_norm": 0.025701193138957024, | |
| "learning_rate": 1.41841921252206e-06, | |
| "loss": 0.7879, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 1.6981949458483756, | |
| "grad_norm": 0.025167284533381462, | |
| "learning_rate": 1.4087310829178295e-06, | |
| "loss": 0.7707, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.6993501805054152, | |
| "grad_norm": 0.024784136563539505, | |
| "learning_rate": 1.3990631688198505e-06, | |
| "loss": 0.7753, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 1.7005054151624548, | |
| "grad_norm": 0.024584993720054626, | |
| "learning_rate": 1.3894156492829932e-06, | |
| "loss": 0.7579, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.7016606498194946, | |
| "grad_norm": 0.025017768144607544, | |
| "learning_rate": 1.3797887029844083e-06, | |
| "loss": 0.7512, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 1.7028158844765344, | |
| "grad_norm": 0.02527061477303505, | |
| "learning_rate": 1.3701825082202227e-06, | |
| "loss": 0.772, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.703971119133574, | |
| "grad_norm": 0.02521699294447899, | |
| "learning_rate": 1.3605972429022308e-06, | |
| "loss": 0.766, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.7051263537906136, | |
| "grad_norm": 0.024668460711836815, | |
| "learning_rate": 1.3510330845546029e-06, | |
| "loss": 0.7638, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.7062815884476534, | |
| "grad_norm": 0.02491624839603901, | |
| "learning_rate": 1.3414902103105984e-06, | |
| "loss": 0.7572, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 1.7074368231046932, | |
| "grad_norm": 0.02490483969449997, | |
| "learning_rate": 1.3319687969092844e-06, | |
| "loss": 0.7763, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.7085920577617328, | |
| "grad_norm": 0.02502390742301941, | |
| "learning_rate": 1.3224690206922594e-06, | |
| "loss": 0.7859, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 1.7097472924187724, | |
| "grad_norm": 0.024547290056943893, | |
| "learning_rate": 1.3129910576003918e-06, | |
| "loss": 0.76, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.7109025270758123, | |
| "grad_norm": 0.024496039375662804, | |
| "learning_rate": 1.3035350831705581e-06, | |
| "loss": 0.7541, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 1.712057761732852, | |
| "grad_norm": 0.0256884153932333, | |
| "learning_rate": 1.2941012725323943e-06, | |
| "loss": 0.7794, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.7132129963898917, | |
| "grad_norm": 0.025446278974413872, | |
| "learning_rate": 1.2846898004050527e-06, | |
| "loss": 0.789, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 1.7143682310469313, | |
| "grad_norm": 0.02467747963964939, | |
| "learning_rate": 1.2753008410939593e-06, | |
| "loss": 0.7704, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.715523465703971, | |
| "grad_norm": 0.024466682225465775, | |
| "learning_rate": 1.2659345684875971e-06, | |
| "loss": 0.7751, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 1.716678700361011, | |
| "grad_norm": 0.02474912814795971, | |
| "learning_rate": 1.2565911560542752e-06, | |
| "loss": 0.7721, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.7178339350180507, | |
| "grad_norm": 0.024523675441741943, | |
| "learning_rate": 1.2472707768389226e-06, | |
| "loss": 0.7638, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 1.7189891696750903, | |
| "grad_norm": 0.02448326162993908, | |
| "learning_rate": 1.2379736034598793e-06, | |
| "loss": 0.7898, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.72014440433213, | |
| "grad_norm": 0.024665791541337967, | |
| "learning_rate": 1.2286998081057024e-06, | |
| "loss": 0.757, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 1.7212996389891697, | |
| "grad_norm": 0.024392489343881607, | |
| "learning_rate": 1.219449562531973e-06, | |
| "loss": 0.7756, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.7224548736462095, | |
| "grad_norm": 0.024781666696071625, | |
| "learning_rate": 1.2102230380581221e-06, | |
| "loss": 0.7851, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 1.7236101083032491, | |
| "grad_norm": 0.024531826376914978, | |
| "learning_rate": 1.2010204055642504e-06, | |
| "loss": 0.7786, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.7247653429602887, | |
| "grad_norm": 0.024737264961004257, | |
| "learning_rate": 1.1918418354879655e-06, | |
| "loss": 0.7918, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 1.7259205776173285, | |
| "grad_norm": 0.024697288870811462, | |
| "learning_rate": 1.1826874978212304e-06, | |
| "loss": 0.7797, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.7270758122743683, | |
| "grad_norm": 0.023727286607027054, | |
| "learning_rate": 1.1735575621072076e-06, | |
| "loss": 0.7622, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 1.728231046931408, | |
| "grad_norm": 0.024227799847722054, | |
| "learning_rate": 1.1644521974371236e-06, | |
| "loss": 0.7538, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.7293862815884475, | |
| "grad_norm": 0.02443331852555275, | |
| "learning_rate": 1.1553715724471356e-06, | |
| "loss": 0.7567, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 1.7305415162454874, | |
| "grad_norm": 0.02441231720149517, | |
| "learning_rate": 1.146315855315211e-06, | |
| "loss": 0.763, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.7316967509025272, | |
| "grad_norm": 0.02491583675146103, | |
| "learning_rate": 1.1372852137580082e-06, | |
| "loss": 0.7519, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 1.7328519855595668, | |
| "grad_norm": 0.024958504363894463, | |
| "learning_rate": 1.1282798150277738e-06, | |
| "loss": 0.7704, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.7340072202166064, | |
| "grad_norm": 0.02489922009408474, | |
| "learning_rate": 1.1192998259092428e-06, | |
| "loss": 0.8001, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 1.7351624548736462, | |
| "grad_norm": 0.024233028292655945, | |
| "learning_rate": 1.1103454127165526e-06, | |
| "loss": 0.7725, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.736317689530686, | |
| "grad_norm": 0.02405642904341221, | |
| "learning_rate": 1.1014167412901609e-06, | |
| "loss": 0.7227, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 1.7374729241877256, | |
| "grad_norm": 0.025087906047701836, | |
| "learning_rate": 1.092513976993772e-06, | |
| "loss": 0.7852, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.7386281588447652, | |
| "grad_norm": 0.025061985477805138, | |
| "learning_rate": 1.083637284711279e-06, | |
| "loss": 0.7648, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 1.739783393501805, | |
| "grad_norm": 0.024631349369883537, | |
| "learning_rate": 1.074786828843705e-06, | |
| "loss": 0.741, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.7409386281588448, | |
| "grad_norm": 0.024879056960344315, | |
| "learning_rate": 1.0659627733061643e-06, | |
| "loss": 0.7355, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 1.7420938628158846, | |
| "grad_norm": 0.024683699011802673, | |
| "learning_rate": 1.0571652815248199e-06, | |
| "loss": 0.8, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.7432490974729242, | |
| "grad_norm": 0.02492067962884903, | |
| "learning_rate": 1.0483945164338617e-06, | |
| "loss": 0.7701, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 1.7444043321299638, | |
| "grad_norm": 0.02489222027361393, | |
| "learning_rate": 1.039650640472486e-06, | |
| "loss": 0.7831, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.7455595667870036, | |
| "grad_norm": 0.024434035643935204, | |
| "learning_rate": 1.0309338155818906e-06, | |
| "loss": 0.7896, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 1.7467148014440435, | |
| "grad_norm": 0.025025788694620132, | |
| "learning_rate": 1.0222442032022705e-06, | |
| "loss": 0.7806, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.747870036101083, | |
| "grad_norm": 0.024875549599528313, | |
| "learning_rate": 1.0135819642698318e-06, | |
| "loss": 0.7844, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 1.7490252707581226, | |
| "grad_norm": 0.024595865979790688, | |
| "learning_rate": 1.0049472592138105e-06, | |
| "loss": 0.7766, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.7501805054151625, | |
| "grad_norm": 0.02502664178609848, | |
| "learning_rate": 9.963402479535004e-07, | |
| "loss": 0.7813, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 1.7513357400722023, | |
| "grad_norm": 0.024631381034851074, | |
| "learning_rate": 9.877610898952902e-07, | |
| "loss": 0.79, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.7524909747292419, | |
| "grad_norm": 0.023990454152226448, | |
| "learning_rate": 9.792099439297134e-07, | |
| "loss": 0.7875, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 1.7536462093862815, | |
| "grad_norm": 0.0241263285279274, | |
| "learning_rate": 9.706869684285066e-07, | |
| "loss": 0.7605, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.7548014440433213, | |
| "grad_norm": 0.02398371510207653, | |
| "learning_rate": 9.621923212416713e-07, | |
| "loss": 0.7778, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 1.755956678700361, | |
| "grad_norm": 0.025420811027288437, | |
| "learning_rate": 9.53726159694558e-07, | |
| "loss": 0.7881, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.7571119133574007, | |
| "grad_norm": 0.024340439587831497, | |
| "learning_rate": 9.452886405849418e-07, | |
| "loss": 0.7725, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 1.7582671480144403, | |
| "grad_norm": 0.02469663880765438, | |
| "learning_rate": 9.36879920180131e-07, | |
| "loss": 0.771, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.75942238267148, | |
| "grad_norm": 0.024449417367577553, | |
| "learning_rate": 9.285001542140625e-07, | |
| "loss": 0.7624, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 1.76057761732852, | |
| "grad_norm": 0.024617070332169533, | |
| "learning_rate": 9.201494978844251e-07, | |
| "loss": 0.7763, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.7617328519855595, | |
| "grad_norm": 0.02444697730243206, | |
| "learning_rate": 9.118281058497792e-07, | |
| "loss": 0.7634, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.7628880866425993, | |
| "grad_norm": 0.02412438951432705, | |
| "learning_rate": 9.03536132226695e-07, | |
| "loss": 0.7501, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.764043321299639, | |
| "grad_norm": 0.024980876594781876, | |
| "learning_rate": 8.952737305869002e-07, | |
| "loss": 0.802, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 1.7651985559566787, | |
| "grad_norm": 0.02424163930118084, | |
| "learning_rate": 8.870410539544322e-07, | |
| "loss": 0.7846, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.7663537906137186, | |
| "grad_norm": 0.025128301233053207, | |
| "learning_rate": 8.788382548028056e-07, | |
| "loss": 0.7551, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 1.7675090252707581, | |
| "grad_norm": 0.02520165592432022, | |
| "learning_rate": 8.706654850521883e-07, | |
| "loss": 0.7736, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.7686642599277977, | |
| "grad_norm": 0.02502501755952835, | |
| "learning_rate": 8.625228960665893e-07, | |
| "loss": 0.7632, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 1.7698194945848376, | |
| "grad_norm": 0.02456534653902054, | |
| "learning_rate": 8.544106386510526e-07, | |
| "loss": 0.7514, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.7709747292418774, | |
| "grad_norm": 0.02477426454424858, | |
| "learning_rate": 8.463288630488653e-07, | |
| "loss": 0.7741, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 1.772129963898917, | |
| "grad_norm": 0.02434214949607849, | |
| "learning_rate": 8.382777189387751e-07, | |
| "loss": 0.7721, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.7732851985559566, | |
| "grad_norm": 0.023917306214571, | |
| "learning_rate": 8.302573554322192e-07, | |
| "loss": 0.7507, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 1.7744404332129964, | |
| "grad_norm": 0.024669747799634933, | |
| "learning_rate": 8.222679210705636e-07, | |
| "loss": 0.7993, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.7755956678700362, | |
| "grad_norm": 0.024916965514421463, | |
| "learning_rate": 8.143095638223446e-07, | |
| "loss": 0.7536, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 1.7767509025270758, | |
| "grad_norm": 0.02413412742316723, | |
| "learning_rate": 8.063824310805396e-07, | |
| "loss": 0.7552, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.7779061371841154, | |
| "grad_norm": 0.025234825909137726, | |
| "learning_rate": 7.984866696598284e-07, | |
| "loss": 0.7845, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 1.7790613718411552, | |
| "grad_norm": 0.024565840139985085, | |
| "learning_rate": 7.906224257938796e-07, | |
| "loss": 0.7674, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.780216606498195, | |
| "grad_norm": 0.024029076099395752, | |
| "learning_rate": 7.827898451326386e-07, | |
| "loss": 0.7637, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 1.7813718411552346, | |
| "grad_norm": 0.0241931714117527, | |
| "learning_rate": 7.749890727396312e-07, | |
| "loss": 0.7754, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.7825270758122742, | |
| "grad_norm": 0.0242807324975729, | |
| "learning_rate": 7.672202530892778e-07, | |
| "loss": 0.7751, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 1.783682310469314, | |
| "grad_norm": 0.024539347738027573, | |
| "learning_rate": 7.594835300642188e-07, | |
| "loss": 0.7955, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.7848375451263538, | |
| "grad_norm": 0.024527210742235184, | |
| "learning_rate": 7.517790469526456e-07, | |
| "loss": 0.7713, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 1.7859927797833937, | |
| "grad_norm": 0.024129964411258698, | |
| "learning_rate": 7.441069464456497e-07, | |
| "loss": 0.7888, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.7871480144404333, | |
| "grad_norm": 0.024345900863409042, | |
| "learning_rate": 7.364673706345822e-07, | |
| "loss": 0.7548, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 1.7883032490974728, | |
| "grad_norm": 0.024231646209955215, | |
| "learning_rate": 7.288604610084172e-07, | |
| "loss": 0.7645, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.7894584837545127, | |
| "grad_norm": 0.023612959310412407, | |
| "learning_rate": 7.212863584511341e-07, | |
| "loss": 0.7565, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 1.7906137184115525, | |
| "grad_norm": 0.02500954270362854, | |
| "learning_rate": 7.137452032391084e-07, | |
| "loss": 0.7646, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.791768953068592, | |
| "grad_norm": 0.024092212319374084, | |
| "learning_rate": 7.062371350385143e-07, | |
| "loss": 0.7667, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 1.7929241877256317, | |
| "grad_norm": 0.023889616131782532, | |
| "learning_rate": 6.987622929027362e-07, | |
| "loss": 0.7848, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.7940794223826715, | |
| "grad_norm": 0.024128958582878113, | |
| "learning_rate": 6.913208152697935e-07, | |
| "loss": 0.7667, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 1.7952346570397113, | |
| "grad_norm": 0.024358974769711494, | |
| "learning_rate": 6.839128399597784e-07, | |
| "loss": 0.7742, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.796389891696751, | |
| "grad_norm": 0.024110984057188034, | |
| "learning_rate": 6.765385041723024e-07, | |
| "loss": 0.7783, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 1.7975451263537905, | |
| "grad_norm": 0.02420348860323429, | |
| "learning_rate": 6.691979444839561e-07, | |
| "loss": 0.7737, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.7987003610108303, | |
| "grad_norm": 0.024441605433821678, | |
| "learning_rate": 6.618912968457771e-07, | |
| "loss": 0.7767, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 1.7998555956678701, | |
| "grad_norm": 0.024723242968320847, | |
| "learning_rate": 6.546186965807347e-07, | |
| "loss": 0.7607, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.8010108303249097, | |
| "grad_norm": 0.025666577741503716, | |
| "learning_rate": 6.473802783812225e-07, | |
| "loss": 0.8028, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 1.8021660649819493, | |
| "grad_norm": 0.024900216609239578, | |
| "learning_rate": 6.401761763065661e-07, | |
| "loss": 0.7452, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.8033212996389891, | |
| "grad_norm": 0.024593856185674667, | |
| "learning_rate": 6.330065237805361e-07, | |
| "loss": 0.7879, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 1.804476534296029, | |
| "grad_norm": 0.024126332253217697, | |
| "learning_rate": 6.258714535888803e-07, | |
| "loss": 0.7687, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.8056317689530685, | |
| "grad_norm": 0.02449883334338665, | |
| "learning_rate": 6.187710978768624e-07, | |
| "loss": 0.7622, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 1.8067870036101084, | |
| "grad_norm": 0.024287080392241478, | |
| "learning_rate": 6.117055881468175e-07, | |
| "loss": 0.7863, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.807942238267148, | |
| "grad_norm": 0.024112451821565628, | |
| "learning_rate": 6.046750552557139e-07, | |
| "loss": 0.7887, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 1.8090974729241878, | |
| "grad_norm": 0.02391749620437622, | |
| "learning_rate": 5.976796294127286e-07, | |
| "loss": 0.7687, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.8102527075812276, | |
| "grad_norm": 0.023689934983849525, | |
| "learning_rate": 5.907194401768411e-07, | |
| "loss": 0.7705, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 1.8114079422382672, | |
| "grad_norm": 0.025095578283071518, | |
| "learning_rate": 5.837946164544276e-07, | |
| "loss": 0.805, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.8125631768953068, | |
| "grad_norm": 0.02462933026254177, | |
| "learning_rate": 5.769052864968768e-07, | |
| "loss": 0.7699, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 1.8137184115523466, | |
| "grad_norm": 0.023868851363658905, | |
| "learning_rate": 5.700515778982142e-07, | |
| "loss": 0.7506, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.8148736462093864, | |
| "grad_norm": 0.024339932948350906, | |
| "learning_rate": 5.632336175927404e-07, | |
| "loss": 0.7663, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 1.816028880866426, | |
| "grad_norm": 0.023983048275113106, | |
| "learning_rate": 5.564515318526759e-07, | |
| "loss": 0.7555, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.8171841155234656, | |
| "grad_norm": 0.024354344233870506, | |
| "learning_rate": 5.497054462858296e-07, | |
| "loss": 0.7856, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 1.8183393501805054, | |
| "grad_norm": 0.02329828403890133, | |
| "learning_rate": 5.429954858332623e-07, | |
| "loss": 0.7755, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.8194945848375452, | |
| "grad_norm": 0.024324821308255196, | |
| "learning_rate": 5.363217747669843e-07, | |
| "loss": 0.7871, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.8206498194945848, | |
| "grad_norm": 0.02310972288250923, | |
| "learning_rate": 5.296844366876441e-07, | |
| "loss": 0.7499, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.8218050541516244, | |
| "grad_norm": 0.02391042560338974, | |
| "learning_rate": 5.230835945222463e-07, | |
| "loss": 0.7941, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 1.8229602888086642, | |
| "grad_norm": 0.024979131296277046, | |
| "learning_rate": 5.165193705218699e-07, | |
| "loss": 0.7719, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.824115523465704, | |
| "grad_norm": 0.024278726428747177, | |
| "learning_rate": 5.099918862594065e-07, | |
| "loss": 0.7815, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 1.8252707581227436, | |
| "grad_norm": 0.024197446182370186, | |
| "learning_rate": 5.035012626273094e-07, | |
| "loss": 0.7538, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.8264259927797832, | |
| "grad_norm": 0.02464877814054489, | |
| "learning_rate": 4.970476198353524e-07, | |
| "loss": 0.764, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 1.827581227436823, | |
| "grad_norm": 0.02464544028043747, | |
| "learning_rate": 4.906310774084055e-07, | |
| "loss": 0.787, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.8287364620938629, | |
| "grad_norm": 0.023831835016608238, | |
| "learning_rate": 4.842517541842186e-07, | |
| "loss": 0.7527, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 1.8298916967509027, | |
| "grad_norm": 0.02423412911593914, | |
| "learning_rate": 4.779097683112254e-07, | |
| "loss": 0.7639, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.8310469314079423, | |
| "grad_norm": 0.023963099345564842, | |
| "learning_rate": 4.7160523724634964e-07, | |
| "loss": 0.7676, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 1.8322021660649819, | |
| "grad_norm": 0.023802466690540314, | |
| "learning_rate": 4.653382777528332e-07, | |
| "loss": 0.7805, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.8333574007220217, | |
| "grad_norm": 0.024113498628139496, | |
| "learning_rate": 4.5910900589807164e-07, | |
| "loss": 0.7578, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 1.8345126353790615, | |
| "grad_norm": 0.024253182113170624, | |
| "learning_rate": 4.5291753705146685e-07, | |
| "loss": 0.7816, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.835667870036101, | |
| "grad_norm": 0.024671630933880806, | |
| "learning_rate": 4.4676398588229044e-07, | |
| "loss": 0.7712, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 1.8368231046931407, | |
| "grad_norm": 0.02365119196474552, | |
| "learning_rate": 4.40648466357554e-07, | |
| "loss": 0.7673, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.8379783393501805, | |
| "grad_norm": 0.024656254798173904, | |
| "learning_rate": 4.3457109173990736e-07, | |
| "loss": 0.7627, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 1.8391335740072203, | |
| "grad_norm": 0.02385905385017395, | |
| "learning_rate": 4.285319745855341e-07, | |
| "loss": 0.7758, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.84028880866426, | |
| "grad_norm": 0.02395222708582878, | |
| "learning_rate": 4.2253122674207165e-07, | |
| "loss": 0.7763, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 1.8414440433212995, | |
| "grad_norm": 0.02432720549404621, | |
| "learning_rate": 4.165689593465352e-07, | |
| "loss": 0.7873, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.8425992779783393, | |
| "grad_norm": 0.02468527853488922, | |
| "learning_rate": 4.1064528282326263e-07, | |
| "loss": 0.7517, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 1.8437545126353792, | |
| "grad_norm": 0.02376159094274044, | |
| "learning_rate": 4.047603068818685e-07, | |
| "loss": 0.7715, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.8449097472924187, | |
| "grad_norm": 0.02394728921353817, | |
| "learning_rate": 3.9891414051521345e-07, | |
| "loss": 0.7697, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 1.8460649819494583, | |
| "grad_norm": 0.024393731728196144, | |
| "learning_rate": 3.93106891997382e-07, | |
| "loss": 0.7651, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.8472202166064982, | |
| "grad_norm": 0.02447488158941269, | |
| "learning_rate": 3.8733866888168026e-07, | |
| "loss": 0.7862, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 1.848375451263538, | |
| "grad_norm": 0.023694733157753944, | |
| "learning_rate": 3.816095779986455e-07, | |
| "loss": 0.7704, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.8495306859205776, | |
| "grad_norm": 0.023735985159873962, | |
| "learning_rate": 3.759197254540624e-07, | |
| "loss": 0.7712, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 1.8506859205776174, | |
| "grad_norm": 0.02489444985985756, | |
| "learning_rate": 3.702692166270027e-07, | |
| "loss": 0.7918, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.851841155234657, | |
| "grad_norm": 0.02421208843588829, | |
| "learning_rate": 3.6465815616787e-07, | |
| "loss": 0.7888, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 1.8529963898916968, | |
| "grad_norm": 0.0240098275244236, | |
| "learning_rate": 3.590866479964664e-07, | |
| "loss": 0.747, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.8541516245487366, | |
| "grad_norm": 0.025053462013602257, | |
| "learning_rate": 3.535547953000618e-07, | |
| "loss": 0.7872, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 1.8553068592057762, | |
| "grad_norm": 0.023561321198940277, | |
| "learning_rate": 3.4806270053148703e-07, | |
| "loss": 0.7748, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.8564620938628158, | |
| "grad_norm": 0.023926684632897377, | |
| "learning_rate": 3.4261046540723424e-07, | |
| "loss": 0.7576, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 1.8576173285198556, | |
| "grad_norm": 0.023767825216054916, | |
| "learning_rate": 3.371981909055759e-07, | |
| "loss": 0.7577, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.8587725631768954, | |
| "grad_norm": 0.0232445877045393, | |
| "learning_rate": 3.318259772646901e-07, | |
| "loss": 0.7442, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 1.859927797833935, | |
| "grad_norm": 0.024164149537682533, | |
| "learning_rate": 3.264939239808092e-07, | |
| "loss": 0.7589, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.8610830324909746, | |
| "grad_norm": 0.023966316133737564, | |
| "learning_rate": 3.2120212980637286e-07, | |
| "loss": 0.768, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 1.8622382671480144, | |
| "grad_norm": 0.023847166448831558, | |
| "learning_rate": 3.1595069274820076e-07, | |
| "loss": 0.7603, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.8633935018050543, | |
| "grad_norm": 0.02418961189687252, | |
| "learning_rate": 3.1073971006567867e-07, | |
| "loss": 0.7814, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 1.8645487364620938, | |
| "grad_norm": 0.023753009736537933, | |
| "learning_rate": 3.055692782689551e-07, | |
| "loss": 0.7667, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.8657039711191334, | |
| "grad_norm": 0.02413536049425602, | |
| "learning_rate": 3.0043949311715463e-07, | |
| "loss": 0.7826, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 1.8668592057761733, | |
| "grad_norm": 0.024381062015891075, | |
| "learning_rate": 2.953504496166049e-07, | |
| "loss": 0.7678, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.868014440433213, | |
| "grad_norm": 0.02448740415275097, | |
| "learning_rate": 2.903022420190769e-07, | |
| "loss": 0.7751, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 1.8691696750902527, | |
| "grad_norm": 0.024086831137537956, | |
| "learning_rate": 2.8529496382003895e-07, | |
| "loss": 0.7799, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.8703249097472923, | |
| "grad_norm": 0.02341514825820923, | |
| "learning_rate": 2.8032870775692436e-07, | |
| "loss": 0.7723, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 1.871480144404332, | |
| "grad_norm": 0.023932676762342453, | |
| "learning_rate": 2.75403565807417e-07, | |
| "loss": 0.7635, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.872635379061372, | |
| "grad_norm": 0.023613903671503067, | |
| "learning_rate": 2.705196291877439e-07, | |
| "loss": 0.7605, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 1.8737906137184117, | |
| "grad_norm": 0.024287506937980652, | |
| "learning_rate": 2.6567698835098765e-07, | |
| "loss": 0.7815, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.8749458483754513, | |
| "grad_norm": 0.023454533889889717, | |
| "learning_rate": 2.608757329854116e-07, | |
| "loss": 0.759, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 1.876101083032491, | |
| "grad_norm": 0.024026375263929367, | |
| "learning_rate": 2.561159520127993e-07, | |
| "loss": 0.7847, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.8772563176895307, | |
| "grad_norm": 0.024074561893939972, | |
| "learning_rate": 2.5139773358680497e-07, | |
| "loss": 0.7734, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.8784115523465705, | |
| "grad_norm": 0.024127675220370293, | |
| "learning_rate": 2.467211650913235e-07, | |
| "loss": 0.7627, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.8795667870036101, | |
| "grad_norm": 0.024527592584490776, | |
| "learning_rate": 2.4208633313887026e-07, | |
| "loss": 0.8107, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 1.8807220216606497, | |
| "grad_norm": 0.02324732393026352, | |
| "learning_rate": 2.3749332356897935e-07, | |
| "loss": 0.7739, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.8818772563176895, | |
| "grad_norm": 0.02475915476679802, | |
| "learning_rate": 2.3294222144661086e-07, | |
| "loss": 0.778, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 1.8830324909747294, | |
| "grad_norm": 0.024105004966259003, | |
| "learning_rate": 2.2843311106057757e-07, | |
| "loss": 0.7772, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.884187725631769, | |
| "grad_norm": 0.02386721596121788, | |
| "learning_rate": 2.2396607592198304e-07, | |
| "loss": 0.7631, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 1.8853429602888085, | |
| "grad_norm": 0.024259351193904877, | |
| "learning_rate": 2.1954119876267445e-07, | |
| "loss": 0.7966, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.8864981949458484, | |
| "grad_norm": 0.023345720022916794, | |
| "learning_rate": 2.1515856153371186e-07, | |
| "loss": 0.7685, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 1.8876534296028882, | |
| "grad_norm": 0.024387696757912636, | |
| "learning_rate": 2.108182454038492e-07, | |
| "loss": 0.7807, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.8888086642599278, | |
| "grad_norm": 0.024348227307200432, | |
| "learning_rate": 2.065203307580309e-07, | |
| "loss": 0.7761, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 1.8899638989169674, | |
| "grad_norm": 0.023634381592273712, | |
| "learning_rate": 2.0226489719590362e-07, | |
| "loss": 0.7619, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.8911191335740072, | |
| "grad_norm": 0.023854384198784828, | |
| "learning_rate": 1.9805202353034296e-07, | |
| "loss": 0.7587, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 1.892274368231047, | |
| "grad_norm": 0.023444773629307747, | |
| "learning_rate": 1.9388178778599145e-07, | |
| "loss": 0.7739, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.8934296028880866, | |
| "grad_norm": 0.023775937035679817, | |
| "learning_rate": 1.8975426719781569e-07, | |
| "loss": 0.7671, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 1.8945848375451262, | |
| "grad_norm": 0.023820441216230392, | |
| "learning_rate": 1.85669538209674e-07, | |
| "loss": 0.7873, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.895740072202166, | |
| "grad_norm": 0.023935571312904358, | |
| "learning_rate": 1.816276764729035e-07, | |
| "loss": 0.7698, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 1.8968953068592058, | |
| "grad_norm": 0.02413167431950569, | |
| "learning_rate": 1.7762875684491536e-07, | |
| "loss": 0.7851, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.8980505415162456, | |
| "grad_norm": 0.02435237169265747, | |
| "learning_rate": 1.7367285338781114e-07, | |
| "loss": 0.7699, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 1.8992057761732852, | |
| "grad_norm": 0.024776320904493332, | |
| "learning_rate": 1.6976003936701042e-07, | |
| "loss": 0.7768, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.9003610108303248, | |
| "grad_norm": 0.02364964410662651, | |
| "learning_rate": 1.6589038724989278e-07, | |
| "loss": 0.7931, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 1.9015162454873646, | |
| "grad_norm": 0.023821519687771797, | |
| "learning_rate": 1.6206396870445796e-07, | |
| "loss": 0.7533, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.9026714801444045, | |
| "grad_norm": 0.02380574308335781, | |
| "learning_rate": 1.582808545979954e-07, | |
| "loss": 0.7875, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 1.903826714801444, | |
| "grad_norm": 0.024249248206615448, | |
| "learning_rate": 1.5454111499577497e-07, | |
| "loss": 0.7561, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.9049819494584836, | |
| "grad_norm": 0.02454947866499424, | |
| "learning_rate": 1.5084481915974666e-07, | |
| "loss": 0.7695, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 1.9061371841155235, | |
| "grad_norm": 0.023830397054553032, | |
| "learning_rate": 1.471920355472605e-07, | |
| "loss": 0.7765, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.9072924187725633, | |
| "grad_norm": 0.023539626970887184, | |
| "learning_rate": 1.4358283180979607e-07, | |
| "loss": 0.8193, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 1.9084476534296029, | |
| "grad_norm": 0.02411411888897419, | |
| "learning_rate": 1.4001727479171022e-07, | |
| "loss": 0.8001, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.9096028880866425, | |
| "grad_norm": 0.024499407038092613, | |
| "learning_rate": 1.3649543052900035e-07, | |
| "loss": 0.7714, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 1.9107581227436823, | |
| "grad_norm": 0.023902500048279762, | |
| "learning_rate": 1.3301736424808105e-07, | |
| "loss": 0.7906, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.911913357400722, | |
| "grad_norm": 0.024184612557291985, | |
| "learning_rate": 1.295831403645745e-07, | |
| "loss": 0.7643, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 1.9130685920577617, | |
| "grad_norm": 0.024717319756746292, | |
| "learning_rate": 1.2619282248211896e-07, | |
| "loss": 0.8194, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.9142238267148013, | |
| "grad_norm": 0.023884067311882973, | |
| "learning_rate": 1.2284647339118997e-07, | |
| "loss": 0.753, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 1.915379061371841, | |
| "grad_norm": 0.023947538807988167, | |
| "learning_rate": 1.1954415506793914e-07, | |
| "loss": 0.7659, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.916534296028881, | |
| "grad_norm": 0.02345140650868416, | |
| "learning_rate": 1.1628592867304393e-07, | |
| "loss": 0.7673, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 1.9176895306859207, | |
| "grad_norm": 0.02360740303993225, | |
| "learning_rate": 1.1307185455057616e-07, | |
| "loss": 0.7703, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.9188447653429603, | |
| "grad_norm": 0.023865753784775734, | |
| "learning_rate": 1.0990199222688366e-07, | |
| "loss": 0.7839, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.02310926280915737, | |
| "learning_rate": 1.0677640040949089e-07, | |
| "loss": 0.7723, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.9211552346570397, | |
| "grad_norm": 0.023065300658345222, | |
| "learning_rate": 1.0369513698600607e-07, | |
| "loss": 0.7699, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 1.9223104693140796, | |
| "grad_norm": 0.023747362196445465, | |
| "learning_rate": 1.0065825902305331e-07, | |
| "loss": 0.7736, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.9234657039711192, | |
| "grad_norm": 0.02400401420891285, | |
| "learning_rate": 9.766582276521622e-08, | |
| "loss": 0.7661, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 1.9246209386281588, | |
| "grad_norm": 0.02401837147772312, | |
| "learning_rate": 9.471788363399199e-08, | |
| "loss": 0.7723, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.9257761732851986, | |
| "grad_norm": 0.024011146277189255, | |
| "learning_rate": 9.181449622676956e-08, | |
| "loss": 0.7702, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 1.9269314079422384, | |
| "grad_norm": 0.023694701492786407, | |
| "learning_rate": 8.8955714315814e-08, | |
| "loss": 0.7538, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.928086642599278, | |
| "grad_norm": 0.02426433376967907, | |
| "learning_rate": 8.614159084727651e-08, | |
| "loss": 0.7792, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 1.9292418772563176, | |
| "grad_norm": 0.024202091619372368, | |
| "learning_rate": 8.337217794020758e-08, | |
| "loss": 0.7423, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.9303971119133574, | |
| "grad_norm": 0.024061063304543495, | |
| "learning_rate": 8.064752688559553e-08, | |
| "loss": 0.7549, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 1.9315523465703972, | |
| "grad_norm": 0.024105606600642204, | |
| "learning_rate": 7.796768814541562e-08, | |
| "loss": 0.7845, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.9327075812274368, | |
| "grad_norm": 0.024117371067404747, | |
| "learning_rate": 7.533271135169531e-08, | |
| "loss": 0.7706, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 1.9338628158844764, | |
| "grad_norm": 0.023541245609521866, | |
| "learning_rate": 7.274264530559488e-08, | |
| "loss": 0.7794, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.9350180505415162, | |
| "grad_norm": 0.023918094113469124, | |
| "learning_rate": 7.019753797650377e-08, | |
| "loss": 0.7515, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.936173285198556, | |
| "grad_norm": 0.02431383728981018, | |
| "learning_rate": 6.769743650115355e-08, | |
| "loss": 0.7852, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.9373285198555956, | |
| "grad_norm": 0.022908180952072144, | |
| "learning_rate": 6.524238718274186e-08, | |
| "loss": 0.7451, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 1.9384837545126352, | |
| "grad_norm": 0.023673677816987038, | |
| "learning_rate": 6.283243549007756e-08, | |
| "loss": 0.7679, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.939638989169675, | |
| "grad_norm": 0.02336922660470009, | |
| "learning_rate": 6.046762605673734e-08, | |
| "loss": 0.7619, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 1.9407942238267148, | |
| "grad_norm": 0.023723188787698746, | |
| "learning_rate": 5.814800268024005e-08, | |
| "loss": 0.7792, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.9419494584837547, | |
| "grad_norm": 0.02384623885154724, | |
| "learning_rate": 5.58736083212344e-08, | |
| "loss": 0.7648, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 1.9431046931407943, | |
| "grad_norm": 0.024481408298015594, | |
| "learning_rate": 5.364448510270387e-08, | |
| "loss": 0.7907, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.9442599277978339, | |
| "grad_norm": 0.023579951375722885, | |
| "learning_rate": 5.146067430918623e-08, | |
| "loss": 0.7737, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 1.9454151624548737, | |
| "grad_norm": 0.02421787939965725, | |
| "learning_rate": 4.932221638601039e-08, | |
| "loss": 0.7746, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.9465703971119135, | |
| "grad_norm": 0.02382575534284115, | |
| "learning_rate": 4.722915093854457e-08, | |
| "loss": 0.7547, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 1.947725631768953, | |
| "grad_norm": 0.024289904162287712, | |
| "learning_rate": 4.518151673146557e-08, | |
| "loss": 0.7786, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.9488808664259927, | |
| "grad_norm": 0.0236971415579319, | |
| "learning_rate": 4.3179351688039386e-08, | |
| "loss": 0.7785, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 1.9500361010830325, | |
| "grad_norm": 0.023959195241332054, | |
| "learning_rate": 4.122269288941911e-08, | |
| "loss": 0.7665, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.9511913357400723, | |
| "grad_norm": 0.02382025495171547, | |
| "learning_rate": 3.931157657395747e-08, | |
| "loss": 0.7539, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 1.952346570397112, | |
| "grad_norm": 0.024375753477215767, | |
| "learning_rate": 3.744603813653741e-08, | |
| "loss": 0.7962, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.9535018050541515, | |
| "grad_norm": 0.023500755429267883, | |
| "learning_rate": 3.562611212791524e-08, | |
| "loss": 0.7373, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 1.9546570397111913, | |
| "grad_norm": 0.024235745891928673, | |
| "learning_rate": 3.3851832254080314e-08, | |
| "loss": 0.7971, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.9558122743682311, | |
| "grad_norm": 0.02372434362769127, | |
| "learning_rate": 3.212323137563259e-08, | |
| "loss": 0.746, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 1.9569675090252707, | |
| "grad_norm": 0.02388044260442257, | |
| "learning_rate": 3.044034150717202e-08, | |
| "loss": 0.7903, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.9581227436823103, | |
| "grad_norm": 0.02452683448791504, | |
| "learning_rate": 2.880319381670718e-08, | |
| "loss": 0.7752, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 1.9592779783393501, | |
| "grad_norm": 0.023390717804431915, | |
| "learning_rate": 2.721181862507687e-08, | |
| "loss": 0.7453, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.96043321299639, | |
| "grad_norm": 0.023405345156788826, | |
| "learning_rate": 2.566624540538925e-08, | |
| "loss": 0.7601, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 1.9615884476534298, | |
| "grad_norm": 0.023698341101408005, | |
| "learning_rate": 2.4166502782475928e-08, | |
| "loss": 0.7577, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.9627436823104694, | |
| "grad_norm": 0.024021916091442108, | |
| "learning_rate": 2.2712618532361902e-08, | |
| "loss": 0.7723, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 1.963898916967509, | |
| "grad_norm": 0.024296529591083527, | |
| "learning_rate": 2.130461958174988e-08, | |
| "loss": 0.7896, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.9650541516245488, | |
| "grad_norm": 0.02386784926056862, | |
| "learning_rate": 1.9942532007523544e-08, | |
| "loss": 0.7587, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 1.9662093862815886, | |
| "grad_norm": 0.023428186774253845, | |
| "learning_rate": 1.8626381036264058e-08, | |
| "loss": 0.7714, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.9673646209386282, | |
| "grad_norm": 0.024159930646419525, | |
| "learning_rate": 1.735619104378108e-08, | |
| "loss": 0.7937, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 1.9685198555956678, | |
| "grad_norm": 0.024223441258072853, | |
| "learning_rate": 1.6131985554663975e-08, | |
| "loss": 0.761, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.9696750902527076, | |
| "grad_norm": 0.02402389794588089, | |
| "learning_rate": 1.4953787241843607e-08, | |
| "loss": 0.7742, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 1.9708303249097474, | |
| "grad_norm": 0.023779474198818207, | |
| "learning_rate": 1.382161792617522e-08, | |
| "loss": 0.7489, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.971985559566787, | |
| "grad_norm": 0.02408597618341446, | |
| "learning_rate": 1.273549857603129e-08, | |
| "loss": 0.7927, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 1.9731407942238266, | |
| "grad_norm": 0.023695794865489006, | |
| "learning_rate": 1.16954493069155e-08, | |
| "loss": 0.7603, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.9742960288808664, | |
| "grad_norm": 0.025053711608052254, | |
| "learning_rate": 1.0701489381089175e-08, | |
| "loss": 0.7886, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 1.9754512635379062, | |
| "grad_norm": 0.023919204249978065, | |
| "learning_rate": 9.753637207214111e-09, | |
| "loss": 0.7837, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.9766064981949458, | |
| "grad_norm": 0.023150555789470673, | |
| "learning_rate": 8.85191034001343e-09, | |
| "loss": 0.7449, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 1.9777617328519854, | |
| "grad_norm": 0.023474812507629395, | |
| "learning_rate": 7.996325479943542e-09, | |
| "loss": 0.7825, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.9789169675090252, | |
| "grad_norm": 0.02393323741853237, | |
| "learning_rate": 7.186898472888319e-09, | |
| "loss": 0.7702, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 1.980072202166065, | |
| "grad_norm": 0.02350715547800064, | |
| "learning_rate": 6.423644309862407e-09, | |
| "loss": 0.7662, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.9812274368231046, | |
| "grad_norm": 0.023993050679564476, | |
| "learning_rate": 5.706577126735919e-09, | |
| "loss": 0.794, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 1.9823826714801442, | |
| "grad_norm": 0.024228619411587715, | |
| "learning_rate": 5.035710203971343e-09, | |
| "loss": 0.7874, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.983537906137184, | |
| "grad_norm": 0.024353953078389168, | |
| "learning_rate": 4.411055966378209e-09, | |
| "loss": 0.7666, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 1.9846931407942239, | |
| "grad_norm": 0.023848267272114754, | |
| "learning_rate": 3.832625982882452e-09, | |
| "loss": 0.7824, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.9858483754512637, | |
| "grad_norm": 0.02303747460246086, | |
| "learning_rate": 3.300430966312174e-09, | |
| "loss": 0.7451, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 1.9870036101083033, | |
| "grad_norm": 0.02363884262740612, | |
| "learning_rate": 2.8144807732005904e-09, | |
| "loss": 0.7673, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.9881588447653429, | |
| "grad_norm": 0.024231605231761932, | |
| "learning_rate": 2.374784403601199e-09, | |
| "loss": 0.7823, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 1.9893140794223827, | |
| "grad_norm": 0.02352130599319935, | |
| "learning_rate": 1.9813500009229292e-09, | |
| "loss": 0.7676, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.9904693140794225, | |
| "grad_norm": 0.02459130994975567, | |
| "learning_rate": 1.634184851778611e-09, | |
| "loss": 0.7937, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 1.991624548736462, | |
| "grad_norm": 0.023892242461442947, | |
| "learning_rate": 1.3332953858495395e-09, | |
| "loss": 0.7666, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.9927797833935017, | |
| "grad_norm": 0.0234362855553627, | |
| "learning_rate": 1.0786871757678059e-09, | |
| "loss": 0.7796, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.9939350180505415, | |
| "grad_norm": 0.024267012253403664, | |
| "learning_rate": 8.703649370116684e-10, | |
| "loss": 0.738, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.9950902527075813, | |
| "grad_norm": 0.023687295615673065, | |
| "learning_rate": 7.083325278189627e-10, | |
| "loss": 0.7633, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 1.996245487364621, | |
| "grad_norm": 0.02364824153482914, | |
| "learning_rate": 5.925929491152231e-10, | |
| "loss": 0.769, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.9974007220216605, | |
| "grad_norm": 0.024108268320560455, | |
| "learning_rate": 5.231483444587321e-10, | |
| "loss": 0.7676, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 1.9985559566787003, | |
| "grad_norm": 0.025233233347535133, | |
| "learning_rate": 5e-10, | |
| "loss": 0.7958, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.9985559566787003, | |
| "step": 1730, | |
| "total_flos": 2.356928687040889e+19, | |
| "train_loss": 0.8199663722446199, | |
| "train_runtime": 64534.4414, | |
| "train_samples_per_second": 10.301, | |
| "train_steps_per_second": 0.027 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1730, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.356928687040889e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |