Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use guangyangnlp/Qwen3-4B-SFT-science-2e-5 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use guangyangnlp/Qwen3-4B-SFT-science-2e-5 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="guangyangnlp/Qwen3-4B-SFT-science-2e-5") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("guangyangnlp/Qwen3-4B-SFT-science-2e-5") model = AutoModelForMultimodalLM.from_pretrained("guangyangnlp/Qwen3-4B-SFT-science-2e-5") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use guangyangnlp/Qwen3-4B-SFT-science-2e-5 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "guangyangnlp/Qwen3-4B-SFT-science-2e-5" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "guangyangnlp/Qwen3-4B-SFT-science-2e-5", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/guangyangnlp/Qwen3-4B-SFT-science-2e-5
- SGLang
How to use guangyangnlp/Qwen3-4B-SFT-science-2e-5 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "guangyangnlp/Qwen3-4B-SFT-science-2e-5" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "guangyangnlp/Qwen3-4B-SFT-science-2e-5", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "guangyangnlp/Qwen3-4B-SFT-science-2e-5" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "guangyangnlp/Qwen3-4B-SFT-science-2e-5", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use guangyangnlp/Qwen3-4B-SFT-science-2e-5 with Docker Model Runner:
docker model run hf.co/guangyangnlp/Qwen3-4B-SFT-science-2e-5
| { | |
| "best_global_step": 1380, | |
| "best_metric": 0.6770720481872559, | |
| "best_model_checkpoint": "saves/qwen3-4B/Qwen3-4B-SFT-science-2e-5/checkpoint-1380", | |
| "epoch": 3.0, | |
| "eval_steps": 230, | |
| "global_step": 2313, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0012976480129764801, | |
| "grad_norm": 8.15907096862793, | |
| "learning_rate": 0.0, | |
| "loss": 1.117659091949463, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0025952960259529602, | |
| "grad_norm": 7.67869234085083, | |
| "learning_rate": 1.7241379310344828e-07, | |
| "loss": 1.0263863801956177, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0038929440389294406, | |
| "grad_norm": 8.24106502532959, | |
| "learning_rate": 3.4482758620689656e-07, | |
| "loss": 1.1220319271087646, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0051905920519059205, | |
| "grad_norm": 8.60258960723877, | |
| "learning_rate": 5.172413793103449e-07, | |
| "loss": 1.1806347370147705, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.006488240064882401, | |
| "grad_norm": 7.782258033752441, | |
| "learning_rate": 6.896551724137931e-07, | |
| "loss": 1.105953574180603, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007785888077858881, | |
| "grad_norm": 7.797566890716553, | |
| "learning_rate": 8.620689655172415e-07, | |
| "loss": 1.0968478918075562, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009083536090835361, | |
| "grad_norm": 7.626895427703857, | |
| "learning_rate": 1.0344827586206898e-06, | |
| "loss": 1.0549066066741943, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.010381184103811841, | |
| "grad_norm": 7.147245407104492, | |
| "learning_rate": 1.2068965517241381e-06, | |
| "loss": 1.0259548425674438, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01167883211678832, | |
| "grad_norm": 5.977053165435791, | |
| "learning_rate": 1.3793103448275862e-06, | |
| "loss": 0.954434335231781, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.012976480129764802, | |
| "grad_norm": 6.206176280975342, | |
| "learning_rate": 1.5517241379310346e-06, | |
| "loss": 1.049869418144226, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014274128142741281, | |
| "grad_norm": 5.300525665283203, | |
| "learning_rate": 1.724137931034483e-06, | |
| "loss": 1.0076310634613037, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.015571776155717762, | |
| "grad_norm": 4.235332489013672, | |
| "learning_rate": 1.896551724137931e-06, | |
| "loss": 0.9547766447067261, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01686942416869424, | |
| "grad_norm": 4.258054733276367, | |
| "learning_rate": 2.0689655172413796e-06, | |
| "loss": 0.9808558225631714, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.018167072181670723, | |
| "grad_norm": 3.9000754356384277, | |
| "learning_rate": 2.241379310344828e-06, | |
| "loss": 0.955378532409668, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019464720194647202, | |
| "grad_norm": 2.9283816814422607, | |
| "learning_rate": 2.4137931034482762e-06, | |
| "loss": 0.9264786839485168, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.020762368207623682, | |
| "grad_norm": 2.1859076023101807, | |
| "learning_rate": 2.5862068965517246e-06, | |
| "loss": 0.8895066380500793, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02206001622060016, | |
| "grad_norm": 2.1717398166656494, | |
| "learning_rate": 2.7586206896551725e-06, | |
| "loss": 0.9194827675819397, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02335766423357664, | |
| "grad_norm": 1.7686649560928345, | |
| "learning_rate": 2.931034482758621e-06, | |
| "loss": 0.8839207291603088, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.024655312246553124, | |
| "grad_norm": 1.7060308456420898, | |
| "learning_rate": 3.103448275862069e-06, | |
| "loss": 0.8821989297866821, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.025952960259529603, | |
| "grad_norm": 1.4888310432434082, | |
| "learning_rate": 3.2758620689655175e-06, | |
| "loss": 0.7937015295028687, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.027250608272506083, | |
| "grad_norm": 1.5812122821807861, | |
| "learning_rate": 3.448275862068966e-06, | |
| "loss": 0.9222494959831238, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.028548256285482562, | |
| "grad_norm": 1.5842291116714478, | |
| "learning_rate": 3.620689655172414e-06, | |
| "loss": 0.8129012584686279, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02984590429845904, | |
| "grad_norm": 1.5270442962646484, | |
| "learning_rate": 3.793103448275862e-06, | |
| "loss": 0.843705415725708, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.031143552311435525, | |
| "grad_norm": 1.1963210105895996, | |
| "learning_rate": 3.96551724137931e-06, | |
| "loss": 0.7932494878768921, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.032441200324412, | |
| "grad_norm": 1.0309710502624512, | |
| "learning_rate": 4.137931034482759e-06, | |
| "loss": 0.7899153828620911, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03373884833738848, | |
| "grad_norm": 0.9451068639755249, | |
| "learning_rate": 4.310344827586207e-06, | |
| "loss": 0.8323757648468018, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.035036496350364967, | |
| "grad_norm": 0.9398018717765808, | |
| "learning_rate": 4.482758620689656e-06, | |
| "loss": 0.8048505187034607, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.036334144363341446, | |
| "grad_norm": 0.8759371042251587, | |
| "learning_rate": 4.655172413793104e-06, | |
| "loss": 0.8321108222007751, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.037631792376317925, | |
| "grad_norm": 0.7862148284912109, | |
| "learning_rate": 4.8275862068965525e-06, | |
| "loss": 0.8356962203979492, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.038929440389294405, | |
| "grad_norm": 0.8221083283424377, | |
| "learning_rate": 5e-06, | |
| "loss": 0.856194794178009, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.040227088402270884, | |
| "grad_norm": 0.7913339734077454, | |
| "learning_rate": 5.172413793103449e-06, | |
| "loss": 0.782647967338562, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.041524736415247364, | |
| "grad_norm": 0.7948570847511292, | |
| "learning_rate": 5.344827586206896e-06, | |
| "loss": 0.8002289533615112, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04282238442822384, | |
| "grad_norm": 0.8172705769538879, | |
| "learning_rate": 5.517241379310345e-06, | |
| "loss": 0.8037389516830444, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04412003244120032, | |
| "grad_norm": 0.7674341797828674, | |
| "learning_rate": 5.689655172413794e-06, | |
| "loss": 0.7561640739440918, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0454176804541768, | |
| "grad_norm": 0.7508828043937683, | |
| "learning_rate": 5.862068965517242e-06, | |
| "loss": 0.820884108543396, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04671532846715328, | |
| "grad_norm": 0.7388272285461426, | |
| "learning_rate": 6.03448275862069e-06, | |
| "loss": 0.8406673669815063, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04801297648012977, | |
| "grad_norm": 0.6549146771430969, | |
| "learning_rate": 6.206896551724138e-06, | |
| "loss": 0.7618731260299683, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04931062449310625, | |
| "grad_norm": 0.6996558904647827, | |
| "learning_rate": 6.379310344827587e-06, | |
| "loss": 0.7531220316886902, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.05060827250608273, | |
| "grad_norm": 0.659206748008728, | |
| "learning_rate": 6.551724137931035e-06, | |
| "loss": 0.8432419896125793, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.05190592051905921, | |
| "grad_norm": 0.6969435811042786, | |
| "learning_rate": 6.724137931034484e-06, | |
| "loss": 0.8152772784233093, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.053203568532035686, | |
| "grad_norm": 0.638674795627594, | |
| "learning_rate": 6.896551724137932e-06, | |
| "loss": 0.8012467622756958, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.054501216545012166, | |
| "grad_norm": 0.6248321533203125, | |
| "learning_rate": 7.0689655172413796e-06, | |
| "loss": 0.7576991319656372, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.055798864557988645, | |
| "grad_norm": 0.6499493718147278, | |
| "learning_rate": 7.241379310344828e-06, | |
| "loss": 0.7685450911521912, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.057096512570965124, | |
| "grad_norm": 0.6266531348228455, | |
| "learning_rate": 7.413793103448277e-06, | |
| "loss": 0.7682685852050781, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 0.6328745484352112, | |
| "learning_rate": 7.586206896551724e-06, | |
| "loss": 0.8221952319145203, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05969180859691808, | |
| "grad_norm": 0.6457077860832214, | |
| "learning_rate": 7.758620689655173e-06, | |
| "loss": 0.7616772055625916, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06098945660989456, | |
| "grad_norm": 0.6841326951980591, | |
| "learning_rate": 7.93103448275862e-06, | |
| "loss": 0.7185612916946411, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.06228710462287105, | |
| "grad_norm": 0.653884768486023, | |
| "learning_rate": 8.103448275862069e-06, | |
| "loss": 0.8144221901893616, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06358475263584752, | |
| "grad_norm": 0.6235163807868958, | |
| "learning_rate": 8.275862068965518e-06, | |
| "loss": 0.7789400815963745, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.064882400648824, | |
| "grad_norm": 0.6035148501396179, | |
| "learning_rate": 8.448275862068966e-06, | |
| "loss": 0.7788746356964111, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06618004866180048, | |
| "grad_norm": 0.6197084784507751, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 0.7773774266242981, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.06747769667477696, | |
| "grad_norm": 0.6356611847877502, | |
| "learning_rate": 8.793103448275862e-06, | |
| "loss": 0.8119993209838867, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06877534468775345, | |
| "grad_norm": 0.6229863166809082, | |
| "learning_rate": 8.965517241379312e-06, | |
| "loss": 0.8156378269195557, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.07007299270072993, | |
| "grad_norm": 0.6285703778266907, | |
| "learning_rate": 9.13793103448276e-06, | |
| "loss": 0.7589212656021118, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.07137064071370641, | |
| "grad_norm": 0.6221722960472107, | |
| "learning_rate": 9.310344827586207e-06, | |
| "loss": 0.7588199377059937, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.07266828872668289, | |
| "grad_norm": 0.5896920561790466, | |
| "learning_rate": 9.482758620689655e-06, | |
| "loss": 0.7869905233383179, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07396593673965937, | |
| "grad_norm": 0.6120532155036926, | |
| "learning_rate": 9.655172413793105e-06, | |
| "loss": 0.7379593849182129, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.07526358475263585, | |
| "grad_norm": 0.6437456011772156, | |
| "learning_rate": 9.827586206896553e-06, | |
| "loss": 0.8263105154037476, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07656123276561233, | |
| "grad_norm": 0.6005666851997375, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8053442239761353, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.07785888077858881, | |
| "grad_norm": 0.618229866027832, | |
| "learning_rate": 1.0172413793103449e-05, | |
| "loss": 0.7303550243377686, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07915652879156529, | |
| "grad_norm": 0.6245790719985962, | |
| "learning_rate": 1.0344827586206898e-05, | |
| "loss": 0.7618341445922852, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.08045417680454177, | |
| "grad_norm": 0.632989227771759, | |
| "learning_rate": 1.0517241379310346e-05, | |
| "loss": 0.8073338270187378, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.08175182481751825, | |
| "grad_norm": 0.6083235740661621, | |
| "learning_rate": 1.0689655172413792e-05, | |
| "loss": 0.7776636481285095, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.08304947283049473, | |
| "grad_norm": 0.6136429309844971, | |
| "learning_rate": 1.0862068965517242e-05, | |
| "loss": 0.8043953776359558, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08434712084347121, | |
| "grad_norm": 0.6103477478027344, | |
| "learning_rate": 1.103448275862069e-05, | |
| "loss": 0.7928889989852905, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.08564476885644769, | |
| "grad_norm": 0.6038222312927246, | |
| "learning_rate": 1.1206896551724138e-05, | |
| "loss": 0.7927621603012085, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08694241686942417, | |
| "grad_norm": 0.6238990426063538, | |
| "learning_rate": 1.1379310344827587e-05, | |
| "loss": 0.7877966165542603, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.08824006488240065, | |
| "grad_norm": 0.5899522304534912, | |
| "learning_rate": 1.1551724137931035e-05, | |
| "loss": 0.721104621887207, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08953771289537713, | |
| "grad_norm": 0.6330446004867554, | |
| "learning_rate": 1.1724137931034483e-05, | |
| "loss": 0.8130797147750854, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0908353609083536, | |
| "grad_norm": 0.6214055418968201, | |
| "learning_rate": 1.1896551724137933e-05, | |
| "loss": 0.78719162940979, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09213300892133008, | |
| "grad_norm": 0.648266077041626, | |
| "learning_rate": 1.206896551724138e-05, | |
| "loss": 0.7923158407211304, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.09343065693430656, | |
| "grad_norm": 0.6473869681358337, | |
| "learning_rate": 1.2241379310344827e-05, | |
| "loss": 0.8679413795471191, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09472830494728304, | |
| "grad_norm": 0.5954247117042542, | |
| "learning_rate": 1.2413793103448277e-05, | |
| "loss": 0.7424967288970947, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.09602595296025954, | |
| "grad_norm": 0.6318120956420898, | |
| "learning_rate": 1.2586206896551725e-05, | |
| "loss": 0.7612457275390625, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.09732360097323602, | |
| "grad_norm": 0.6183631420135498, | |
| "learning_rate": 1.2758620689655174e-05, | |
| "loss": 0.7567603588104248, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0986212489862125, | |
| "grad_norm": 0.6186433434486389, | |
| "learning_rate": 1.2931034482758622e-05, | |
| "loss": 0.8088338375091553, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09991889699918897, | |
| "grad_norm": 0.6034461855888367, | |
| "learning_rate": 1.310344827586207e-05, | |
| "loss": 0.7736937999725342, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.10121654501216545, | |
| "grad_norm": 0.6197369694709778, | |
| "learning_rate": 1.327586206896552e-05, | |
| "loss": 0.7498612999916077, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.10251419302514193, | |
| "grad_norm": 0.6505046486854553, | |
| "learning_rate": 1.3448275862068967e-05, | |
| "loss": 0.8144986629486084, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.10381184103811841, | |
| "grad_norm": 0.6240726113319397, | |
| "learning_rate": 1.3620689655172414e-05, | |
| "loss": 0.7407926321029663, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10510948905109489, | |
| "grad_norm": 0.6124047040939331, | |
| "learning_rate": 1.3793103448275863e-05, | |
| "loss": 0.7526525855064392, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.10640713706407137, | |
| "grad_norm": 0.5982939004898071, | |
| "learning_rate": 1.3965517241379311e-05, | |
| "loss": 0.722671627998352, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10770478507704785, | |
| "grad_norm": 0.5908958315849304, | |
| "learning_rate": 1.4137931034482759e-05, | |
| "loss": 0.7402417659759521, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.10900243309002433, | |
| "grad_norm": 0.6116979718208313, | |
| "learning_rate": 1.4310344827586209e-05, | |
| "loss": 0.7960222959518433, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.11030008110300081, | |
| "grad_norm": 0.6197500228881836, | |
| "learning_rate": 1.4482758620689657e-05, | |
| "loss": 0.7519891858100891, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.11159772911597729, | |
| "grad_norm": 2.220649480819702, | |
| "learning_rate": 1.4655172413793105e-05, | |
| "loss": 0.7659766674041748, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.11289537712895377, | |
| "grad_norm": 5.19334602355957, | |
| "learning_rate": 1.4827586206896554e-05, | |
| "loss": 0.7760565280914307, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.11419302514193025, | |
| "grad_norm": 0.6664707064628601, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.7354503870010376, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.11549067315490673, | |
| "grad_norm": 0.6490852236747742, | |
| "learning_rate": 1.5172413793103448e-05, | |
| "loss": 0.7803969979286194, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 0.6153193116188049, | |
| "learning_rate": 1.5344827586206898e-05, | |
| "loss": 0.7803000807762146, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11808596918085969, | |
| "grad_norm": 0.6364138722419739, | |
| "learning_rate": 1.5517241379310346e-05, | |
| "loss": 0.7799690961837769, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.11938361719383617, | |
| "grad_norm": 0.6558602452278137, | |
| "learning_rate": 1.5689655172413794e-05, | |
| "loss": 0.8238034248352051, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.12068126520681265, | |
| "grad_norm": 0.629127562046051, | |
| "learning_rate": 1.586206896551724e-05, | |
| "loss": 0.7694847583770752, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.12197891321978913, | |
| "grad_norm": 0.5806317925453186, | |
| "learning_rate": 1.603448275862069e-05, | |
| "loss": 0.7090768814086914, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.12327656123276562, | |
| "grad_norm": 0.673556387424469, | |
| "learning_rate": 1.6206896551724137e-05, | |
| "loss": 0.8536560535430908, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1245742092457421, | |
| "grad_norm": 0.5968764424324036, | |
| "learning_rate": 1.637931034482759e-05, | |
| "loss": 0.7300469875335693, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.12587185725871858, | |
| "grad_norm": 0.6305297613143921, | |
| "learning_rate": 1.6551724137931037e-05, | |
| "loss": 0.7591036558151245, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.12716950527169504, | |
| "grad_norm": 0.606986403465271, | |
| "learning_rate": 1.6724137931034485e-05, | |
| "loss": 0.76216721534729, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.12846715328467154, | |
| "grad_norm": 0.6063655018806458, | |
| "learning_rate": 1.6896551724137932e-05, | |
| "loss": 0.68424391746521, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.129764801297648, | |
| "grad_norm": 0.7023365497589111, | |
| "learning_rate": 1.706896551724138e-05, | |
| "loss": 0.8325944542884827, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1310624493106245, | |
| "grad_norm": 0.6358933448791504, | |
| "learning_rate": 1.7241379310344828e-05, | |
| "loss": 0.8054566383361816, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.13236009732360096, | |
| "grad_norm": 0.6431549191474915, | |
| "learning_rate": 1.7413793103448276e-05, | |
| "loss": 0.7429993748664856, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.13365774533657745, | |
| "grad_norm": 0.6152120232582092, | |
| "learning_rate": 1.7586206896551724e-05, | |
| "loss": 0.7206076383590698, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.13495539334955392, | |
| "grad_norm": 0.6442373991012573, | |
| "learning_rate": 1.7758620689655175e-05, | |
| "loss": 0.806060791015625, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1362530413625304, | |
| "grad_norm": 0.6756954789161682, | |
| "learning_rate": 1.7931034482758623e-05, | |
| "loss": 0.8363012671470642, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1375506893755069, | |
| "grad_norm": 0.743787407875061, | |
| "learning_rate": 1.810344827586207e-05, | |
| "loss": 0.8207604885101318, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13884833738848337, | |
| "grad_norm": 0.686335563659668, | |
| "learning_rate": 1.827586206896552e-05, | |
| "loss": 0.7393860816955566, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.14014598540145987, | |
| "grad_norm": 0.6191396713256836, | |
| "learning_rate": 1.8448275862068967e-05, | |
| "loss": 0.7534383535385132, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.14144363341443633, | |
| "grad_norm": 0.6754934191703796, | |
| "learning_rate": 1.8620689655172415e-05, | |
| "loss": 0.8022092580795288, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.14274128142741282, | |
| "grad_norm": 0.6399085521697998, | |
| "learning_rate": 1.8793103448275863e-05, | |
| "loss": 0.8507853746414185, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1440389294403893, | |
| "grad_norm": 0.6910972595214844, | |
| "learning_rate": 1.896551724137931e-05, | |
| "loss": 0.8276559710502625, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.14533657745336578, | |
| "grad_norm": 0.5906772613525391, | |
| "learning_rate": 1.913793103448276e-05, | |
| "loss": 0.7183451056480408, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.14663422546634225, | |
| "grad_norm": 0.6329069137573242, | |
| "learning_rate": 1.931034482758621e-05, | |
| "loss": 0.789232611656189, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.14793187347931874, | |
| "grad_norm": 0.6226819157600403, | |
| "learning_rate": 1.9482758620689658e-05, | |
| "loss": 0.7747266292572021, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.1492295214922952, | |
| "grad_norm": 0.65074223279953, | |
| "learning_rate": 1.9655172413793106e-05, | |
| "loss": 0.753608226776123, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1505271695052717, | |
| "grad_norm": 0.6118033528327942, | |
| "learning_rate": 1.9827586206896554e-05, | |
| "loss": 0.7803196907043457, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.15182481751824817, | |
| "grad_norm": 0.6553196907043457, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8216028213500977, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.15312246553122466, | |
| "grad_norm": 0.678218424320221, | |
| "learning_rate": 1.999998977626552e-05, | |
| "loss": 0.807174801826477, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.15442011354420113, | |
| "grad_norm": 0.6192781329154968, | |
| "learning_rate": 1.999995910508299e-05, | |
| "loss": 0.7289496660232544, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.15571776155717762, | |
| "grad_norm": 0.6038413047790527, | |
| "learning_rate": 1.999990798651512e-05, | |
| "loss": 0.7679600119590759, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15701540957015409, | |
| "grad_norm": 0.6870720386505127, | |
| "learning_rate": 1.9999836420666438e-05, | |
| "loss": 0.8232643604278564, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.15831305758313058, | |
| "grad_norm": 0.623460590839386, | |
| "learning_rate": 1.999974440768327e-05, | |
| "loss": 0.7480977177619934, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.15961070559610704, | |
| "grad_norm": 0.651508629322052, | |
| "learning_rate": 1.9999631947753776e-05, | |
| "loss": 0.7708613276481628, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.16090835360908354, | |
| "grad_norm": 0.6450805068016052, | |
| "learning_rate": 1.999949904110789e-05, | |
| "loss": 0.8049247860908508, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.16220600162206, | |
| "grad_norm": 0.6157734990119934, | |
| "learning_rate": 1.999934568801738e-05, | |
| "loss": 0.7631984949111938, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1635036496350365, | |
| "grad_norm": 0.6847337484359741, | |
| "learning_rate": 1.999917188879582e-05, | |
| "loss": 0.7424380779266357, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.164801297648013, | |
| "grad_norm": 0.6398855447769165, | |
| "learning_rate": 1.9998977643798572e-05, | |
| "loss": 0.7688143253326416, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.16609894566098946, | |
| "grad_norm": 0.6518498063087463, | |
| "learning_rate": 1.999876295342283e-05, | |
| "loss": 0.7191232442855835, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.16739659367396595, | |
| "grad_norm": 0.6462240219116211, | |
| "learning_rate": 1.9998527818107577e-05, | |
| "loss": 0.7375045418739319, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.16869424168694241, | |
| "grad_norm": 0.6727373600006104, | |
| "learning_rate": 1.9998272238333606e-05, | |
| "loss": 0.7088533639907837, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1699918896999189, | |
| "grad_norm": 0.689372181892395, | |
| "learning_rate": 1.9997996214623515e-05, | |
| "loss": 0.8250190615653992, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.17128953771289537, | |
| "grad_norm": 0.6236900687217712, | |
| "learning_rate": 1.9997699747541698e-05, | |
| "loss": 0.7653014659881592, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.17258718572587187, | |
| "grad_norm": 0.617174506187439, | |
| "learning_rate": 1.9997382837694355e-05, | |
| "loss": 0.7043566703796387, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.17388483373884833, | |
| "grad_norm": 0.6391400694847107, | |
| "learning_rate": 1.999704548572949e-05, | |
| "loss": 0.8009853363037109, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 0.6218752861022949, | |
| "learning_rate": 1.9996687692336896e-05, | |
| "loss": 0.7598843574523926, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1764801297648013, | |
| "grad_norm": 0.5787500143051147, | |
| "learning_rate": 1.9996309458248184e-05, | |
| "loss": 0.7174202799797058, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 0.6410360932350159, | |
| "learning_rate": 1.999591078423673e-05, | |
| "loss": 0.763797402381897, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.17907542579075425, | |
| "grad_norm": 0.970513641834259, | |
| "learning_rate": 1.9995491671117734e-05, | |
| "loss": 0.6977022290229797, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.18037307380373074, | |
| "grad_norm": 0.6853165030479431, | |
| "learning_rate": 1.999505211974817e-05, | |
| "loss": 0.7822556495666504, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1816707218167072, | |
| "grad_norm": 0.6396400332450867, | |
| "learning_rate": 1.999459213102681e-05, | |
| "loss": 0.7862622737884521, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1829683698296837, | |
| "grad_norm": 0.6066014766693115, | |
| "learning_rate": 1.9994111705894218e-05, | |
| "loss": 0.8506604433059692, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.18426601784266017, | |
| "grad_norm": 0.6197599172592163, | |
| "learning_rate": 1.9993610845332734e-05, | |
| "loss": 0.7890738844871521, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.18556366585563666, | |
| "grad_norm": 0.6512314677238464, | |
| "learning_rate": 1.99930895503665e-05, | |
| "loss": 0.7983291149139404, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.18686131386861313, | |
| "grad_norm": 0.5899611115455627, | |
| "learning_rate": 1.9992547822061427e-05, | |
| "loss": 0.7357482314109802, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.18815896188158962, | |
| "grad_norm": 0.6489595770835876, | |
| "learning_rate": 1.9991985661525217e-05, | |
| "loss": 0.875076174736023, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.18945660989456609, | |
| "grad_norm": 0.6258020997047424, | |
| "learning_rate": 1.999140306990734e-05, | |
| "loss": 0.7252365350723267, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.19075425790754258, | |
| "grad_norm": 0.6045345067977905, | |
| "learning_rate": 1.999080004839905e-05, | |
| "loss": 0.7721343040466309, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.19205190592051907, | |
| "grad_norm": 0.6506165862083435, | |
| "learning_rate": 1.999017659823338e-05, | |
| "loss": 0.8302021026611328, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.19334955393349554, | |
| "grad_norm": 0.6503569483757019, | |
| "learning_rate": 1.9989532720685115e-05, | |
| "loss": 0.825711190700531, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.19464720194647203, | |
| "grad_norm": 0.5828515887260437, | |
| "learning_rate": 1.998886841707083e-05, | |
| "loss": 0.7742114067077637, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1959448499594485, | |
| "grad_norm": 0.5945319533348083, | |
| "learning_rate": 1.9988183688748862e-05, | |
| "loss": 0.8291171789169312, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.197242497972425, | |
| "grad_norm": 0.6298274993896484, | |
| "learning_rate": 1.9987478537119297e-05, | |
| "loss": 0.8312891721725464, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.19854014598540146, | |
| "grad_norm": 0.6161749958992004, | |
| "learning_rate": 1.9986752963624002e-05, | |
| "loss": 0.8070319890975952, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.19983779399837795, | |
| "grad_norm": 0.6540800929069519, | |
| "learning_rate": 1.998600696974658e-05, | |
| "loss": 0.7966468334197998, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.20113544201135442, | |
| "grad_norm": 0.628194272518158, | |
| "learning_rate": 1.9985240557012406e-05, | |
| "loss": 0.7929773926734924, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2024330900243309, | |
| "grad_norm": 0.6037770509719849, | |
| "learning_rate": 1.99844537269886e-05, | |
| "loss": 0.6729363203048706, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.20373073803730737, | |
| "grad_norm": 0.6952143907546997, | |
| "learning_rate": 1.9983646481284028e-05, | |
| "loss": 0.8734431266784668, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.20502838605028387, | |
| "grad_norm": 0.6359195113182068, | |
| "learning_rate": 1.9982818821549308e-05, | |
| "loss": 0.7915219664573669, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.20632603406326033, | |
| "grad_norm": 0.578925609588623, | |
| "learning_rate": 1.9981970749476792e-05, | |
| "loss": 0.7327010631561279, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.20762368207623683, | |
| "grad_norm": 0.6001781821250916, | |
| "learning_rate": 1.998110226680057e-05, | |
| "loss": 0.7517937421798706, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2089213300892133, | |
| "grad_norm": 0.6306588649749756, | |
| "learning_rate": 1.9980213375296468e-05, | |
| "loss": 0.7292003035545349, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.21021897810218979, | |
| "grad_norm": 0.5737298130989075, | |
| "learning_rate": 1.997930407678205e-05, | |
| "loss": 0.7056928873062134, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.21151662611516625, | |
| "grad_norm": 0.6045275926589966, | |
| "learning_rate": 1.99783743731166e-05, | |
| "loss": 0.738794207572937, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.21281427412814274, | |
| "grad_norm": 0.6090785264968872, | |
| "learning_rate": 1.9977424266201126e-05, | |
| "loss": 0.8411350846290588, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2141119221411192, | |
| "grad_norm": 0.6489406824111938, | |
| "learning_rate": 1.9976453757978355e-05, | |
| "loss": 0.750893771648407, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2154095701540957, | |
| "grad_norm": 0.5950313210487366, | |
| "learning_rate": 1.997546285043273e-05, | |
| "loss": 0.6694055199623108, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.21670721816707217, | |
| "grad_norm": 0.6618576645851135, | |
| "learning_rate": 1.9974451545590407e-05, | |
| "loss": 0.8072858452796936, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.21800486618004866, | |
| "grad_norm": 0.587589681148529, | |
| "learning_rate": 1.997341984551925e-05, | |
| "loss": 0.7707666158676147, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.21930251419302516, | |
| "grad_norm": 0.6130505204200745, | |
| "learning_rate": 1.9972367752328824e-05, | |
| "loss": 0.683761715888977, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.22060016220600162, | |
| "grad_norm": 0.6129958033561707, | |
| "learning_rate": 1.9971295268170393e-05, | |
| "loss": 0.7264688014984131, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.22189781021897811, | |
| "grad_norm": 0.6114361882209778, | |
| "learning_rate": 1.9970202395236913e-05, | |
| "loss": 0.7344344854354858, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.22319545823195458, | |
| "grad_norm": 0.6653074622154236, | |
| "learning_rate": 1.996908913576304e-05, | |
| "loss": 0.7358161211013794, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.22449310624493107, | |
| "grad_norm": 0.6639219522476196, | |
| "learning_rate": 1.9967955492025094e-05, | |
| "loss": 0.7851651906967163, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.22579075425790754, | |
| "grad_norm": 0.5558881759643555, | |
| "learning_rate": 1.9966801466341107e-05, | |
| "loss": 0.7109513878822327, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.22708840227088403, | |
| "grad_norm": 0.6213382482528687, | |
| "learning_rate": 1.9965627061070755e-05, | |
| "loss": 0.702171802520752, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2283860502838605, | |
| "grad_norm": 0.6152480840682983, | |
| "learning_rate": 1.996443227861541e-05, | |
| "loss": 0.8059327602386475, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.229683698296837, | |
| "grad_norm": 1.3707772493362427, | |
| "learning_rate": 1.996321712141809e-05, | |
| "loss": 0.6749221682548523, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.23098134630981346, | |
| "grad_norm": 0.6016313433647156, | |
| "learning_rate": 1.9961981591963494e-05, | |
| "loss": 0.7931903004646301, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.23227899432278995, | |
| "grad_norm": 0.6266494393348694, | |
| "learning_rate": 1.9960725692777956e-05, | |
| "loss": 0.7843484878540039, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 0.6365560293197632, | |
| "learning_rate": 1.995944942642948e-05, | |
| "loss": 0.769256055355072, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2348742903487429, | |
| "grad_norm": 0.5864040851593018, | |
| "learning_rate": 1.9958152795527706e-05, | |
| "loss": 0.7252316474914551, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.23617193836171937, | |
| "grad_norm": 0.6339318156242371, | |
| "learning_rate": 1.9956835802723916e-05, | |
| "loss": 0.8299843668937683, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.23746958637469587, | |
| "grad_norm": 0.5974844098091125, | |
| "learning_rate": 1.9955498450711026e-05, | |
| "loss": 0.7282422184944153, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.23876723438767233, | |
| "grad_norm": 0.5841022729873657, | |
| "learning_rate": 1.9954140742223586e-05, | |
| "loss": 0.7407736778259277, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.24006488240064883, | |
| "grad_norm": 0.6066944599151611, | |
| "learning_rate": 1.9952762680037758e-05, | |
| "loss": 0.7745926380157471, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2413625304136253, | |
| "grad_norm": 0.5798110365867615, | |
| "learning_rate": 1.995136426697134e-05, | |
| "loss": 0.7561591863632202, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.24266017842660179, | |
| "grad_norm": 0.5705812573432922, | |
| "learning_rate": 1.9949945505883723e-05, | |
| "loss": 0.7066362500190735, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.24395782643957825, | |
| "grad_norm": 0.6322996020317078, | |
| "learning_rate": 1.994850639967592e-05, | |
| "loss": 0.8032187819480896, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.24525547445255474, | |
| "grad_norm": 0.613441526889801, | |
| "learning_rate": 1.994704695129054e-05, | |
| "loss": 0.75013267993927, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.24655312246553124, | |
| "grad_norm": 0.609327495098114, | |
| "learning_rate": 1.9945567163711788e-05, | |
| "loss": 0.7675092220306396, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2478507704785077, | |
| "grad_norm": 0.6119315028190613, | |
| "learning_rate": 1.9944067039965445e-05, | |
| "loss": 0.7201006412506104, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.2491484184914842, | |
| "grad_norm": 0.5587560534477234, | |
| "learning_rate": 1.9942546583118894e-05, | |
| "loss": 0.7847742438316345, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.25044606650446066, | |
| "grad_norm": 0.5934576988220215, | |
| "learning_rate": 1.994100579628108e-05, | |
| "loss": 0.74636310338974, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.25174371451743716, | |
| "grad_norm": 0.5709709525108337, | |
| "learning_rate": 1.9939444682602522e-05, | |
| "loss": 0.6807436347007751, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.25304136253041365, | |
| "grad_norm": 0.6085708737373352, | |
| "learning_rate": 1.9937863245275303e-05, | |
| "loss": 0.7877497673034668, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2543390105433901, | |
| "grad_norm": 0.5789342522621155, | |
| "learning_rate": 1.9936261487533066e-05, | |
| "loss": 0.7314412593841553, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2556366585563666, | |
| "grad_norm": 0.5808578133583069, | |
| "learning_rate": 1.993463941265099e-05, | |
| "loss": 0.7081149816513062, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2569343065693431, | |
| "grad_norm": 0.5988272428512573, | |
| "learning_rate": 1.993299702394582e-05, | |
| "loss": 0.718379020690918, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.25823195458231957, | |
| "grad_norm": 0.6408476829528809, | |
| "learning_rate": 1.9931334324775817e-05, | |
| "loss": 0.8201683163642883, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.259529602595296, | |
| "grad_norm": 0.582078218460083, | |
| "learning_rate": 1.9929651318540783e-05, | |
| "loss": 0.7401193380355835, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2608272506082725, | |
| "grad_norm": 0.607105553150177, | |
| "learning_rate": 1.9927948008682038e-05, | |
| "loss": 0.74293053150177, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.262124898621249, | |
| "grad_norm": 0.5975603461265564, | |
| "learning_rate": 1.9926224398682424e-05, | |
| "loss": 0.779903769493103, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2634225466342255, | |
| "grad_norm": 0.5534036159515381, | |
| "learning_rate": 1.992448049206628e-05, | |
| "loss": 0.6884838342666626, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2647201946472019, | |
| "grad_norm": 0.610633909702301, | |
| "learning_rate": 1.9922716292399458e-05, | |
| "loss": 0.7174521684646606, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2660178426601784, | |
| "grad_norm": 0.5961881279945374, | |
| "learning_rate": 1.9920931803289302e-05, | |
| "loss": 0.7740389108657837, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2673154906731549, | |
| "grad_norm": 0.5700147747993469, | |
| "learning_rate": 1.9919127028384634e-05, | |
| "loss": 0.7351720333099365, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2686131386861314, | |
| "grad_norm": 0.6236000061035156, | |
| "learning_rate": 1.9917301971375767e-05, | |
| "loss": 0.8022093772888184, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.26991078669910784, | |
| "grad_norm": 0.5870935320854187, | |
| "learning_rate": 1.991545663599448e-05, | |
| "loss": 0.7842336297035217, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.27120843471208433, | |
| "grad_norm": 0.6193575263023376, | |
| "learning_rate": 1.9913591026014016e-05, | |
| "loss": 0.7481486797332764, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.2725060827250608, | |
| "grad_norm": 0.6119521260261536, | |
| "learning_rate": 1.9911705145249076e-05, | |
| "loss": 0.7951152324676514, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2738037307380373, | |
| "grad_norm": 0.5536502599716187, | |
| "learning_rate": 1.9909798997555806e-05, | |
| "loss": 0.790625810623169, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2751013787510138, | |
| "grad_norm": 0.5879918336868286, | |
| "learning_rate": 1.99078725868318e-05, | |
| "loss": 0.7092885971069336, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.27639902676399025, | |
| "grad_norm": 0.5877639055252075, | |
| "learning_rate": 1.9905925917016077e-05, | |
| "loss": 0.724690318107605, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.27769667477696675, | |
| "grad_norm": 0.5909678339958191, | |
| "learning_rate": 1.9903958992089087e-05, | |
| "loss": 0.7642319202423096, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.27899432278994324, | |
| "grad_norm": 0.5952388644218445, | |
| "learning_rate": 1.990197181607269e-05, | |
| "loss": 0.7681585550308228, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.28029197080291973, | |
| "grad_norm": 0.5698040723800659, | |
| "learning_rate": 1.989996439303016e-05, | |
| "loss": 0.7373849153518677, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.28158961881589617, | |
| "grad_norm": 0.5865874886512756, | |
| "learning_rate": 1.989793672706617e-05, | |
| "loss": 0.7335535287857056, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.28288726682887266, | |
| "grad_norm": 0.6045393943786621, | |
| "learning_rate": 1.9895888822326783e-05, | |
| "loss": 0.7242499589920044, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.28418491484184916, | |
| "grad_norm": 0.6004535555839539, | |
| "learning_rate": 1.9893820682999444e-05, | |
| "loss": 0.7604917287826538, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.28548256285482565, | |
| "grad_norm": 1.119056224822998, | |
| "learning_rate": 1.9891732313312973e-05, | |
| "loss": 0.772226095199585, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2867802108678021, | |
| "grad_norm": 0.5902665853500366, | |
| "learning_rate": 1.9889623717537564e-05, | |
| "loss": 0.7658222317695618, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2880778588807786, | |
| "grad_norm": 0.6264858245849609, | |
| "learning_rate": 1.9887494899984757e-05, | |
| "loss": 0.7901877760887146, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2893755068937551, | |
| "grad_norm": 0.5469992756843567, | |
| "learning_rate": 1.9885345865007444e-05, | |
| "loss": 0.7618519067764282, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.29067315490673157, | |
| "grad_norm": 0.5550391674041748, | |
| "learning_rate": 1.9883176616999863e-05, | |
| "loss": 0.788576602935791, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 0.5628973245620728, | |
| "learning_rate": 1.9880987160397573e-05, | |
| "loss": 0.718231737613678, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2932684509326845, | |
| "grad_norm": 0.5723385214805603, | |
| "learning_rate": 1.987877749967746e-05, | |
| "loss": 0.698378324508667, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.294566098945661, | |
| "grad_norm": 0.5784431099891663, | |
| "learning_rate": 1.987654763935772e-05, | |
| "loss": 0.7598991990089417, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.2958637469586375, | |
| "grad_norm": 0.5549972653388977, | |
| "learning_rate": 1.9874297583997852e-05, | |
| "loss": 0.7384412288665771, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2971613949716139, | |
| "grad_norm": 0.5789146423339844, | |
| "learning_rate": 1.9872027338198652e-05, | |
| "loss": 0.7528890371322632, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2984590429845904, | |
| "grad_norm": 0.6021227240562439, | |
| "learning_rate": 1.98697369066022e-05, | |
| "loss": 0.805375337600708, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2984590429845904, | |
| "eval_loss": 0.7241292595863342, | |
| "eval_runtime": 73.217, | |
| "eval_samples_per_second": 70.913, | |
| "eval_steps_per_second": 8.864, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2997566909975669, | |
| "grad_norm": 0.6029407978057861, | |
| "learning_rate": 1.986742629389184e-05, | |
| "loss": 0.7631509900093079, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.3010543390105434, | |
| "grad_norm": 0.5768916606903076, | |
| "learning_rate": 1.98650955047922e-05, | |
| "loss": 0.7468521595001221, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3023519870235199, | |
| "grad_norm": 0.550506055355072, | |
| "learning_rate": 1.9862744544069146e-05, | |
| "loss": 0.7611327767372131, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.30364963503649633, | |
| "grad_norm": 0.5796909332275391, | |
| "learning_rate": 1.9860373416529804e-05, | |
| "loss": 0.7168669700622559, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.30494728304947283, | |
| "grad_norm": 0.8639640808105469, | |
| "learning_rate": 1.9857982127022527e-05, | |
| "loss": 0.7404369115829468, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3062449310624493, | |
| "grad_norm": 0.5862186551094055, | |
| "learning_rate": 1.9855570680436896e-05, | |
| "loss": 0.7222490310668945, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3075425790754258, | |
| "grad_norm": 0.6011035442352295, | |
| "learning_rate": 1.9853139081703712e-05, | |
| "loss": 0.8068719506263733, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.30884022708840225, | |
| "grad_norm": 0.5739139318466187, | |
| "learning_rate": 1.9850687335794974e-05, | |
| "loss": 0.7303578853607178, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.31013787510137875, | |
| "grad_norm": 0.5833807587623596, | |
| "learning_rate": 1.9848215447723888e-05, | |
| "loss": 0.7608842849731445, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.31143552311435524, | |
| "grad_norm": 0.5929459929466248, | |
| "learning_rate": 1.9845723422544834e-05, | |
| "loss": 0.8103141188621521, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.31273317112733173, | |
| "grad_norm": 0.5728944540023804, | |
| "learning_rate": 1.9843211265353376e-05, | |
| "loss": 0.7196205854415894, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.31403081914030817, | |
| "grad_norm": 0.5517752170562744, | |
| "learning_rate": 1.9840678981286237e-05, | |
| "loss": 0.6758772730827332, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.31532846715328466, | |
| "grad_norm": 0.5443773865699768, | |
| "learning_rate": 1.98381265755213e-05, | |
| "loss": 0.6859534978866577, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.31662611516626116, | |
| "grad_norm": 0.5687966346740723, | |
| "learning_rate": 1.9835554053277587e-05, | |
| "loss": 0.7471268177032471, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.31792376317923765, | |
| "grad_norm": 0.5604870319366455, | |
| "learning_rate": 1.9832961419815253e-05, | |
| "loss": 0.6843122839927673, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.3192214111922141, | |
| "grad_norm": 0.5563496351242065, | |
| "learning_rate": 1.983034868043558e-05, | |
| "loss": 0.7023979425430298, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3205190592051906, | |
| "grad_norm": 0.58856201171875, | |
| "learning_rate": 1.9827715840480962e-05, | |
| "loss": 0.826436460018158, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.3218167072181671, | |
| "grad_norm": 0.5512715578079224, | |
| "learning_rate": 1.9825062905334883e-05, | |
| "loss": 0.702526867389679, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.32311435523114357, | |
| "grad_norm": 0.541459858417511, | |
| "learning_rate": 1.9822389880421927e-05, | |
| "loss": 0.7273234128952026, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.32441200324412, | |
| "grad_norm": 0.5705904364585876, | |
| "learning_rate": 1.9819696771207756e-05, | |
| "loss": 0.783245325088501, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3257096512570965, | |
| "grad_norm": 0.5666183829307556, | |
| "learning_rate": 1.981698358319909e-05, | |
| "loss": 0.7261844873428345, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.327007299270073, | |
| "grad_norm": 0.5902214646339417, | |
| "learning_rate": 1.981425032194372e-05, | |
| "loss": 0.7943121194839478, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3283049472830495, | |
| "grad_norm": 0.6048629879951477, | |
| "learning_rate": 1.981149699303047e-05, | |
| "loss": 0.7712939381599426, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.329602595296026, | |
| "grad_norm": 0.5914484858512878, | |
| "learning_rate": 1.9808723602089198e-05, | |
| "loss": 0.7921222448348999, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3309002433090024, | |
| "grad_norm": 0.5761268734931946, | |
| "learning_rate": 1.980593015479079e-05, | |
| "loss": 0.7280013561248779, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.3321978913219789, | |
| "grad_norm": 0.5902722477912903, | |
| "learning_rate": 1.9803116656847136e-05, | |
| "loss": 0.8062602877616882, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3334955393349554, | |
| "grad_norm": 0.5620178580284119, | |
| "learning_rate": 1.9800283114011134e-05, | |
| "loss": 0.7278565168380737, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.3347931873479319, | |
| "grad_norm": 0.5686838626861572, | |
| "learning_rate": 1.9797429532076652e-05, | |
| "loss": 0.7540629506111145, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.33609083536090834, | |
| "grad_norm": 0.5724810361862183, | |
| "learning_rate": 1.9794555916878548e-05, | |
| "loss": 0.8088860511779785, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.33738848337388483, | |
| "grad_norm": 0.5640983581542969, | |
| "learning_rate": 1.9791662274292638e-05, | |
| "loss": 0.7638871669769287, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3386861313868613, | |
| "grad_norm": 0.5784658193588257, | |
| "learning_rate": 1.978874861023569e-05, | |
| "loss": 0.7313830852508545, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.3399837793998378, | |
| "grad_norm": 0.5539552569389343, | |
| "learning_rate": 1.9785814930665404e-05, | |
| "loss": 0.7729085683822632, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.34128142741281425, | |
| "grad_norm": 0.561370849609375, | |
| "learning_rate": 1.9782861241580417e-05, | |
| "loss": 0.6871550679206848, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.34257907542579075, | |
| "grad_norm": 0.5643728375434875, | |
| "learning_rate": 1.9779887549020273e-05, | |
| "loss": 0.7683601379394531, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.34387672343876724, | |
| "grad_norm": 0.5431486964225769, | |
| "learning_rate": 1.9776893859065424e-05, | |
| "loss": 0.7228385210037231, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.34517437145174373, | |
| "grad_norm": 0.5863342881202698, | |
| "learning_rate": 1.9773880177837202e-05, | |
| "loss": 0.7906335592269897, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.34647201946472017, | |
| "grad_norm": 0.5614317655563354, | |
| "learning_rate": 1.9770846511497833e-05, | |
| "loss": 0.7299401164054871, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.34776966747769666, | |
| "grad_norm": 0.5694175958633423, | |
| "learning_rate": 1.9767792866250386e-05, | |
| "loss": 0.7474102973937988, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.34906731549067316, | |
| "grad_norm": 0.5707114934921265, | |
| "learning_rate": 1.97647192483388e-05, | |
| "loss": 0.7324154376983643, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 0.5364754796028137, | |
| "learning_rate": 1.976162566404784e-05, | |
| "loss": 0.6927608251571655, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3516626115166261, | |
| "grad_norm": 0.6064906120300293, | |
| "learning_rate": 1.9758512119703106e-05, | |
| "loss": 0.7652560472488403, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.3529602595296026, | |
| "grad_norm": 0.5919526815414429, | |
| "learning_rate": 1.9755378621671006e-05, | |
| "loss": 0.7977138757705688, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3542579075425791, | |
| "grad_norm": 0.567382276058197, | |
| "learning_rate": 1.9752225176358757e-05, | |
| "loss": 0.7258316278457642, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 0.5770947337150574, | |
| "learning_rate": 1.974905179021435e-05, | |
| "loss": 0.7411879301071167, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.35685320356853206, | |
| "grad_norm": 0.5705130100250244, | |
| "learning_rate": 1.9745858469726555e-05, | |
| "loss": 0.7439219951629639, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3581508515815085, | |
| "grad_norm": 0.5373214483261108, | |
| "learning_rate": 1.9742645221424905e-05, | |
| "loss": 0.6836246252059937, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.359448499594485, | |
| "grad_norm": 0.596576988697052, | |
| "learning_rate": 1.9739412051879686e-05, | |
| "loss": 0.6741154789924622, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3607461476074615, | |
| "grad_norm": 0.5719678997993469, | |
| "learning_rate": 1.973615896770191e-05, | |
| "loss": 0.7447401881217957, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.362043795620438, | |
| "grad_norm": 0.5882077813148499, | |
| "learning_rate": 1.97328859755433e-05, | |
| "loss": 0.7762616872787476, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.3633414436334144, | |
| "grad_norm": 0.6879026293754578, | |
| "learning_rate": 1.972959308209631e-05, | |
| "loss": 0.7956463098526001, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3646390916463909, | |
| "grad_norm": 0.5789086222648621, | |
| "learning_rate": 1.9726280294094067e-05, | |
| "loss": 0.7541590929031372, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.3659367396593674, | |
| "grad_norm": 0.5802841186523438, | |
| "learning_rate": 1.9722947618310384e-05, | |
| "loss": 0.7047423124313354, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3672343876723439, | |
| "grad_norm": 0.5507220029830933, | |
| "learning_rate": 1.9719595061559742e-05, | |
| "loss": 0.6714630722999573, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.36853203568532034, | |
| "grad_norm": 0.5980960726737976, | |
| "learning_rate": 1.9716222630697266e-05, | |
| "loss": 0.7872920036315918, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.36982968369829683, | |
| "grad_norm": 0.5855656266212463, | |
| "learning_rate": 1.971283033261873e-05, | |
| "loss": 0.7662516832351685, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3711273317112733, | |
| "grad_norm": 0.5851466655731201, | |
| "learning_rate": 1.9709418174260523e-05, | |
| "loss": 0.7596746683120728, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3724249797242498, | |
| "grad_norm": 0.5843831300735474, | |
| "learning_rate": 1.9705986162599642e-05, | |
| "loss": 0.7550405263900757, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.37372262773722625, | |
| "grad_norm": 0.5846932530403137, | |
| "learning_rate": 1.9702534304653685e-05, | |
| "loss": 0.7254443764686584, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.37502027575020275, | |
| "grad_norm": 0.6054766774177551, | |
| "learning_rate": 1.9699062607480827e-05, | |
| "loss": 0.7600511908531189, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.37631792376317924, | |
| "grad_norm": 0.5703001618385315, | |
| "learning_rate": 1.969557107817981e-05, | |
| "loss": 0.7401167750358582, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.37761557177615573, | |
| "grad_norm": 0.5855723023414612, | |
| "learning_rate": 1.9692059723889927e-05, | |
| "loss": 0.7476931214332581, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.37891321978913217, | |
| "grad_norm": 0.5804258584976196, | |
| "learning_rate": 1.968852855179101e-05, | |
| "loss": 0.7656409740447998, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.38021086780210867, | |
| "grad_norm": 0.5795084834098816, | |
| "learning_rate": 1.9684977569103415e-05, | |
| "loss": 0.7599056959152222, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.38150851581508516, | |
| "grad_norm": 0.5684756636619568, | |
| "learning_rate": 1.9681406783087998e-05, | |
| "loss": 0.674816370010376, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.38280616382806165, | |
| "grad_norm": 0.5463794469833374, | |
| "learning_rate": 1.9677816201046113e-05, | |
| "loss": 0.683580219745636, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.38410381184103815, | |
| "grad_norm": 0.5722465515136719, | |
| "learning_rate": 1.9674205830319594e-05, | |
| "loss": 0.693361222743988, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3854014598540146, | |
| "grad_norm": 0.6253486275672913, | |
| "learning_rate": 1.9670575678290732e-05, | |
| "loss": 0.7917322516441345, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3866991078669911, | |
| "grad_norm": 0.5660127401351929, | |
| "learning_rate": 1.9666925752382275e-05, | |
| "loss": 0.7436933517456055, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.38799675587996757, | |
| "grad_norm": 0.572499692440033, | |
| "learning_rate": 1.9663256060057395e-05, | |
| "loss": 0.6714681386947632, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.38929440389294406, | |
| "grad_norm": 0.5779220461845398, | |
| "learning_rate": 1.9659566608819677e-05, | |
| "loss": 0.7252252697944641, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3905920519059205, | |
| "grad_norm": 0.5990428924560547, | |
| "learning_rate": 1.9655857406213124e-05, | |
| "loss": 0.7827754020690918, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.391889699918897, | |
| "grad_norm": 0.5721242427825928, | |
| "learning_rate": 1.9652128459822113e-05, | |
| "loss": 0.7102577686309814, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3931873479318735, | |
| "grad_norm": 0.5870105028152466, | |
| "learning_rate": 1.9648379777271397e-05, | |
| "loss": 0.683538019657135, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.39448499594485, | |
| "grad_norm": 0.5920274257659912, | |
| "learning_rate": 1.964461136622608e-05, | |
| "loss": 0.7541404366493225, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3957826439578264, | |
| "grad_norm": 0.5439295768737793, | |
| "learning_rate": 1.9640823234391614e-05, | |
| "loss": 0.675430417060852, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3970802919708029, | |
| "grad_norm": 0.6126630902290344, | |
| "learning_rate": 1.9637015389513765e-05, | |
| "loss": 0.7898478507995605, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3983779399837794, | |
| "grad_norm": 0.5664204359054565, | |
| "learning_rate": 1.963318783937861e-05, | |
| "loss": 0.6964154839515686, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3996755879967559, | |
| "grad_norm": 0.5839046239852905, | |
| "learning_rate": 1.962934059181253e-05, | |
| "loss": 0.7421650886535645, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.40097323600973234, | |
| "grad_norm": 0.6044719815254211, | |
| "learning_rate": 1.962547365468216e-05, | |
| "loss": 0.7794229984283447, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.40227088402270883, | |
| "grad_norm": 0.5989699363708496, | |
| "learning_rate": 1.962158703589442e-05, | |
| "loss": 0.6963369846343994, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4035685320356853, | |
| "grad_norm": 0.5891120433807373, | |
| "learning_rate": 1.9617680743396452e-05, | |
| "loss": 0.7737009525299072, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.4048661800486618, | |
| "grad_norm": 0.5753238201141357, | |
| "learning_rate": 1.961375478517564e-05, | |
| "loss": 0.6912685632705688, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.40616382806163825, | |
| "grad_norm": 0.6656221747398376, | |
| "learning_rate": 1.9609809169259573e-05, | |
| "loss": 0.7757899165153503, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.40746147607461475, | |
| "grad_norm": 0.6444079875946045, | |
| "learning_rate": 1.960584390371604e-05, | |
| "loss": 0.7399554252624512, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 0.5455271601676941, | |
| "learning_rate": 1.9601858996653004e-05, | |
| "loss": 0.7261430025100708, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.41005677210056773, | |
| "grad_norm": 0.5660345554351807, | |
| "learning_rate": 1.9597854456218588e-05, | |
| "loss": 0.7287646532058716, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.41135442011354423, | |
| "grad_norm": 0.5909862518310547, | |
| "learning_rate": 1.9593830290601067e-05, | |
| "loss": 0.7831040620803833, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.41265206812652067, | |
| "grad_norm": 0.5852524638175964, | |
| "learning_rate": 1.9589786508028842e-05, | |
| "loss": 0.7229428291320801, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.41394971613949716, | |
| "grad_norm": 0.5916611552238464, | |
| "learning_rate": 1.9585723116770425e-05, | |
| "loss": 0.7438414692878723, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.41524736415247365, | |
| "grad_norm": 0.5859969854354858, | |
| "learning_rate": 1.9581640125134415e-05, | |
| "loss": 0.7692857384681702, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.41654501216545015, | |
| "grad_norm": 0.5748182535171509, | |
| "learning_rate": 1.9577537541469506e-05, | |
| "loss": 0.7208437919616699, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.4178426601784266, | |
| "grad_norm": 0.5739149451255798, | |
| "learning_rate": 1.957341537416444e-05, | |
| "loss": 0.6877571940422058, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.4191403081914031, | |
| "grad_norm": 0.6014899611473083, | |
| "learning_rate": 1.9569273631648005e-05, | |
| "loss": 0.7482254505157471, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.42043795620437957, | |
| "grad_norm": 0.5997340679168701, | |
| "learning_rate": 1.9565112322389017e-05, | |
| "loss": 0.735174298286438, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.42173560421735606, | |
| "grad_norm": 0.572567343711853, | |
| "learning_rate": 1.95609314548963e-05, | |
| "loss": 0.7159808874130249, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4230332522303325, | |
| "grad_norm": 0.5567170977592468, | |
| "learning_rate": 1.955673103771867e-05, | |
| "loss": 0.6460487842559814, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.424330900243309, | |
| "grad_norm": 0.570945143699646, | |
| "learning_rate": 1.9552511079444914e-05, | |
| "loss": 0.780687689781189, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.4256285482562855, | |
| "grad_norm": 0.5721143484115601, | |
| "learning_rate": 1.9548271588703783e-05, | |
| "loss": 0.7781848907470703, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.426926196269262, | |
| "grad_norm": 0.5866307616233826, | |
| "learning_rate": 1.954401257416396e-05, | |
| "loss": 0.6634104251861572, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4282238442822384, | |
| "grad_norm": 0.575668215751648, | |
| "learning_rate": 1.9539734044534057e-05, | |
| "loss": 0.7831740379333496, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4295214922952149, | |
| "grad_norm": 0.5764342546463013, | |
| "learning_rate": 1.9535436008562576e-05, | |
| "loss": 0.7253679037094116, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.4308191403081914, | |
| "grad_norm": 0.5597108006477356, | |
| "learning_rate": 1.9531118475037916e-05, | |
| "loss": 0.6709398627281189, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4321167883211679, | |
| "grad_norm": 0.595028817653656, | |
| "learning_rate": 1.9526781452788342e-05, | |
| "loss": 0.7365997433662415, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.43341443633414434, | |
| "grad_norm": 0.5742825865745544, | |
| "learning_rate": 1.9522424950681964e-05, | |
| "loss": 0.7389061450958252, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.43471208434712083, | |
| "grad_norm": 0.55686354637146, | |
| "learning_rate": 1.951804897762673e-05, | |
| "loss": 0.6932294964790344, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.4360097323600973, | |
| "grad_norm": 0.6195898652076721, | |
| "learning_rate": 1.951365354257039e-05, | |
| "loss": 0.689919114112854, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4373073803730738, | |
| "grad_norm": 0.5357776284217834, | |
| "learning_rate": 1.9509238654500505e-05, | |
| "loss": 0.6890056133270264, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.4386050283860503, | |
| "grad_norm": 0.563254177570343, | |
| "learning_rate": 1.95048043224444e-05, | |
| "loss": 0.7118027806282043, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.43990267639902675, | |
| "grad_norm": 0.5649257302284241, | |
| "learning_rate": 1.9500350555469164e-05, | |
| "loss": 0.7314987182617188, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.44120032441200324, | |
| "grad_norm": 0.5675091743469238, | |
| "learning_rate": 1.9495877362681613e-05, | |
| "loss": 0.6302130222320557, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.44249797242497974, | |
| "grad_norm": 0.5489922761917114, | |
| "learning_rate": 1.9491384753228308e-05, | |
| "loss": 0.7357535362243652, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.44379562043795623, | |
| "grad_norm": 0.5530965924263, | |
| "learning_rate": 1.948687273629549e-05, | |
| "loss": 0.6449010372161865, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.44509326845093267, | |
| "grad_norm": 0.5747541189193726, | |
| "learning_rate": 1.9482341321109096e-05, | |
| "loss": 0.7252374887466431, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.44639091646390916, | |
| "grad_norm": 0.5609497427940369, | |
| "learning_rate": 1.947779051693472e-05, | |
| "loss": 0.7096484899520874, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.44768856447688565, | |
| "grad_norm": 0.5988261699676514, | |
| "learning_rate": 1.9473220333077604e-05, | |
| "loss": 0.7986630201339722, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.44898621248986215, | |
| "grad_norm": 0.6313751935958862, | |
| "learning_rate": 1.946863077888262e-05, | |
| "loss": 0.8356250524520874, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.4502838605028386, | |
| "grad_norm": 0.565196692943573, | |
| "learning_rate": 1.946402186373424e-05, | |
| "loss": 0.7527079582214355, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.4515815085158151, | |
| "grad_norm": 0.5944785475730896, | |
| "learning_rate": 1.9459393597056536e-05, | |
| "loss": 0.6996445655822754, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.45287915652879157, | |
| "grad_norm": 0.5384091734886169, | |
| "learning_rate": 1.9454745988313135e-05, | |
| "loss": 0.7005808353424072, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.45417680454176806, | |
| "grad_norm": 0.5926419496536255, | |
| "learning_rate": 1.945007904700723e-05, | |
| "loss": 0.7360185384750366, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4554744525547445, | |
| "grad_norm": 0.5517107844352722, | |
| "learning_rate": 1.9445392782681523e-05, | |
| "loss": 0.6678152084350586, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.456772100567721, | |
| "grad_norm": 0.5527735352516174, | |
| "learning_rate": 1.9440687204918245e-05, | |
| "loss": 0.719680666923523, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.4580697485806975, | |
| "grad_norm": 0.5603200793266296, | |
| "learning_rate": 1.943596232333911e-05, | |
| "loss": 0.7023108005523682, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.459367396593674, | |
| "grad_norm": 0.5883275866508484, | |
| "learning_rate": 1.9431218147605307e-05, | |
| "loss": 0.7870659232139587, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4606650446066504, | |
| "grad_norm": 0.5547419786453247, | |
| "learning_rate": 1.9426454687417474e-05, | |
| "loss": 0.693616509437561, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.4619626926196269, | |
| "grad_norm": 0.5387628674507141, | |
| "learning_rate": 1.942167195251568e-05, | |
| "loss": 0.6275761127471924, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4632603406326034, | |
| "grad_norm": 0.5728762745857239, | |
| "learning_rate": 1.941686995267941e-05, | |
| "loss": 0.7649428844451904, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.4645579886455799, | |
| "grad_norm": 0.5744031667709351, | |
| "learning_rate": 1.941204869772753e-05, | |
| "loss": 0.746831476688385, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.4658556366585564, | |
| "grad_norm": 0.5453589558601379, | |
| "learning_rate": 1.9407208197518296e-05, | |
| "loss": 0.7251806259155273, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 0.5643113851547241, | |
| "learning_rate": 1.94023484619493e-05, | |
| "loss": 0.6882834434509277, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4684509326845093, | |
| "grad_norm": 0.5984339714050293, | |
| "learning_rate": 1.9397469500957478e-05, | |
| "loss": 0.7512071132659912, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.4697485806974858, | |
| "grad_norm": 0.5487557649612427, | |
| "learning_rate": 1.939257132451906e-05, | |
| "loss": 0.7803584337234497, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4710462287104623, | |
| "grad_norm": 0.5798037648200989, | |
| "learning_rate": 1.9387653942649586e-05, | |
| "loss": 0.7196419835090637, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.47234387672343875, | |
| "grad_norm": 0.5554172396659851, | |
| "learning_rate": 1.9382717365403854e-05, | |
| "loss": 0.7393349409103394, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.47364152473641524, | |
| "grad_norm": 0.546137273311615, | |
| "learning_rate": 1.9377761602875913e-05, | |
| "loss": 0.7212686538696289, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.47493917274939174, | |
| "grad_norm": 0.5687487125396729, | |
| "learning_rate": 1.937278666519905e-05, | |
| "loss": 0.7769354581832886, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.47623682076236823, | |
| "grad_norm": 0.5400050282478333, | |
| "learning_rate": 1.9367792562545744e-05, | |
| "loss": 0.721081018447876, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.47753446877534467, | |
| "grad_norm": 0.5545980930328369, | |
| "learning_rate": 1.9362779305127674e-05, | |
| "loss": 0.6797982454299927, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.47883211678832116, | |
| "grad_norm": 0.5371907949447632, | |
| "learning_rate": 1.9357746903195686e-05, | |
| "loss": 0.7223237752914429, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.48012976480129765, | |
| "grad_norm": 0.534491240978241, | |
| "learning_rate": 1.9352695367039764e-05, | |
| "loss": 0.7010591626167297, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.48142741281427415, | |
| "grad_norm": 0.5431662797927856, | |
| "learning_rate": 1.9347624706989026e-05, | |
| "loss": 0.7298872470855713, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.4827250608272506, | |
| "grad_norm": 0.5843503475189209, | |
| "learning_rate": 1.9342534933411683e-05, | |
| "loss": 0.7810012698173523, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.4840227088402271, | |
| "grad_norm": 0.5278732776641846, | |
| "learning_rate": 1.9337426056715036e-05, | |
| "loss": 0.7204632759094238, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.48532035685320357, | |
| "grad_norm": 0.5900875926017761, | |
| "learning_rate": 1.9332298087345447e-05, | |
| "loss": 0.7081923484802246, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.48661800486618007, | |
| "grad_norm": 0.5549632906913757, | |
| "learning_rate": 1.932715103578831e-05, | |
| "loss": 0.7588300704956055, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4879156528791565, | |
| "grad_norm": 0.5351032018661499, | |
| "learning_rate": 1.9321984912568048e-05, | |
| "loss": 0.6380345821380615, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.489213300892133, | |
| "grad_norm": 0.5553699135780334, | |
| "learning_rate": 1.9316799728248074e-05, | |
| "loss": 0.7115924954414368, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4905109489051095, | |
| "grad_norm": 0.5904532670974731, | |
| "learning_rate": 1.9311595493430776e-05, | |
| "loss": 0.7918650507926941, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.491808596918086, | |
| "grad_norm": 0.5718861818313599, | |
| "learning_rate": 1.93063722187575e-05, | |
| "loss": 0.7574873566627502, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4931062449310625, | |
| "grad_norm": 0.5575288534164429, | |
| "learning_rate": 1.9301129914908516e-05, | |
| "loss": 0.7619529962539673, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4944038929440389, | |
| "grad_norm": 0.5972062945365906, | |
| "learning_rate": 1.9295868592603012e-05, | |
| "loss": 0.8739205598831177, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.4957015409570154, | |
| "grad_norm": 0.5725207328796387, | |
| "learning_rate": 1.929058826259906e-05, | |
| "loss": 0.7461530566215515, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.4969991889699919, | |
| "grad_norm": 0.7559300065040588, | |
| "learning_rate": 1.9285288935693597e-05, | |
| "loss": 0.7054376602172852, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.4982968369829684, | |
| "grad_norm": 0.5533690452575684, | |
| "learning_rate": 1.9279970622722403e-05, | |
| "loss": 0.742769718170166, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.49959448499594483, | |
| "grad_norm": 0.5702188014984131, | |
| "learning_rate": 1.927463333456009e-05, | |
| "loss": 0.7912020683288574, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5008921330089213, | |
| "grad_norm": 0.5261266231536865, | |
| "learning_rate": 1.9269277082120053e-05, | |
| "loss": 0.7539711594581604, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5021897810218978, | |
| "grad_norm": 0.5590584874153137, | |
| "learning_rate": 1.926390187635448e-05, | |
| "loss": 0.7646081447601318, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.5034874290348743, | |
| "grad_norm": 0.5796819925308228, | |
| "learning_rate": 1.92585077282543e-05, | |
| "loss": 0.7352266907691956, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5047850770478508, | |
| "grad_norm": 0.5712133049964905, | |
| "learning_rate": 1.9253094648849183e-05, | |
| "loss": 0.7203606367111206, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.5060827250608273, | |
| "grad_norm": 0.597654402256012, | |
| "learning_rate": 1.924766264920751e-05, | |
| "loss": 0.8121019601821899, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5073803730738037, | |
| "grad_norm": 0.5626549124717712, | |
| "learning_rate": 1.9242211740436335e-05, | |
| "loss": 0.7297658920288086, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.5086780210867802, | |
| "grad_norm": 0.6014045476913452, | |
| "learning_rate": 1.9236741933681396e-05, | |
| "loss": 0.7325990200042725, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5099756690997567, | |
| "grad_norm": 0.5554893612861633, | |
| "learning_rate": 1.9231253240127062e-05, | |
| "loss": 0.680641770362854, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.5112733171127332, | |
| "grad_norm": 0.5787703394889832, | |
| "learning_rate": 1.922574567099632e-05, | |
| "loss": 0.7252123355865479, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5125709651257097, | |
| "grad_norm": 0.5811824798583984, | |
| "learning_rate": 1.9220219237550757e-05, | |
| "loss": 0.7139418125152588, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5138686131386861, | |
| "grad_norm": 0.547007143497467, | |
| "learning_rate": 1.921467395109053e-05, | |
| "loss": 0.6985068917274475, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5151662611516626, | |
| "grad_norm": 0.6072813272476196, | |
| "learning_rate": 1.9209109822954345e-05, | |
| "loss": 0.7519763708114624, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.5164639091646391, | |
| "grad_norm": 0.5965511798858643, | |
| "learning_rate": 1.9203526864519432e-05, | |
| "loss": 0.7568516135215759, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5177615571776155, | |
| "grad_norm": 0.5627179741859436, | |
| "learning_rate": 1.919792508720154e-05, | |
| "loss": 0.7021974921226501, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.519059205190592, | |
| "grad_norm": 0.5491631627082825, | |
| "learning_rate": 1.9192304502454876e-05, | |
| "loss": 0.6992515325546265, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5203568532035685, | |
| "grad_norm": 0.5874002575874329, | |
| "learning_rate": 1.918666512177211e-05, | |
| "loss": 0.712739109992981, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.521654501216545, | |
| "grad_norm": 0.5660138726234436, | |
| "learning_rate": 1.918100695668436e-05, | |
| "loss": 0.6854047775268555, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5229521492295215, | |
| "grad_norm": 0.565985381603241, | |
| "learning_rate": 1.917533001876113e-05, | |
| "loss": 0.7300174236297607, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.524249797242498, | |
| "grad_norm": 0.5489518642425537, | |
| "learning_rate": 1.916963431961033e-05, | |
| "loss": 0.7667282819747925, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 0.569230318069458, | |
| "learning_rate": 1.916391987087822e-05, | |
| "loss": 0.7247310876846313, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.526845093268451, | |
| "grad_norm": 0.5969386696815491, | |
| "learning_rate": 1.9158186684249397e-05, | |
| "loss": 0.7719178199768066, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5281427412814275, | |
| "grad_norm": 0.5550801157951355, | |
| "learning_rate": 1.9152434771446783e-05, | |
| "loss": 0.6853774785995483, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.5294403892944038, | |
| "grad_norm": 0.5440778136253357, | |
| "learning_rate": 1.914666414423158e-05, | |
| "loss": 0.681282639503479, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5307380373073803, | |
| "grad_norm": 0.5368308424949646, | |
| "learning_rate": 1.914087481440326e-05, | |
| "loss": 0.7318757772445679, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.5320356853203568, | |
| "grad_norm": 0.6122865676879883, | |
| "learning_rate": 1.9135066793799538e-05, | |
| "loss": 0.6974803805351257, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.5386953353881836, | |
| "learning_rate": 1.912924009429635e-05, | |
| "loss": 0.7397326827049255, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.5346309813463098, | |
| "grad_norm": 0.5616509914398193, | |
| "learning_rate": 1.9123394727807816e-05, | |
| "loss": 0.7613886594772339, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5359286293592863, | |
| "grad_norm": 0.627604067325592, | |
| "learning_rate": 1.9117530706286232e-05, | |
| "loss": 0.7783684730529785, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.5372262773722628, | |
| "grad_norm": 0.5613445043563843, | |
| "learning_rate": 1.9111648041722044e-05, | |
| "loss": 0.7296919226646423, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5385239253852393, | |
| "grad_norm": 0.5356356501579285, | |
| "learning_rate": 1.91057467461438e-05, | |
| "loss": 0.7119168639183044, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5398215733982157, | |
| "grad_norm": 0.5709317326545715, | |
| "learning_rate": 1.9099826831618168e-05, | |
| "loss": 0.6891450881958008, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5411192214111922, | |
| "grad_norm": 0.5525058507919312, | |
| "learning_rate": 1.909388831024987e-05, | |
| "loss": 0.7220831513404846, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.5424168694241687, | |
| "grad_norm": 0.5916740894317627, | |
| "learning_rate": 1.908793119418168e-05, | |
| "loss": 0.7380563020706177, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5437145174371452, | |
| "grad_norm": 0.5553448796272278, | |
| "learning_rate": 1.9081955495594388e-05, | |
| "loss": 0.6854832172393799, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.5450121654501217, | |
| "grad_norm": 0.550918459892273, | |
| "learning_rate": 1.9075961226706784e-05, | |
| "loss": 0.755254864692688, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5463098134630981, | |
| "grad_norm": 0.5704249739646912, | |
| "learning_rate": 1.906994839977564e-05, | |
| "loss": 0.762306272983551, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.5476074614760746, | |
| "grad_norm": 0.5444906949996948, | |
| "learning_rate": 1.9063917027095664e-05, | |
| "loss": 0.7424022555351257, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5489051094890511, | |
| "grad_norm": 0.5842110514640808, | |
| "learning_rate": 1.905786712099948e-05, | |
| "loss": 0.7851117849349976, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.5502027575020276, | |
| "grad_norm": 0.5527293086051941, | |
| "learning_rate": 1.9051798693857617e-05, | |
| "loss": 0.7389935255050659, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.551500405515004, | |
| "grad_norm": 0.5890975594520569, | |
| "learning_rate": 1.904571175807848e-05, | |
| "loss": 0.7679333686828613, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5527980535279805, | |
| "grad_norm": 0.5342135429382324, | |
| "learning_rate": 1.9039606326108297e-05, | |
| "loss": 0.7123668193817139, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.554095701540957, | |
| "grad_norm": 0.5628570914268494, | |
| "learning_rate": 1.903348241043114e-05, | |
| "loss": 0.7286348342895508, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.5553933495539335, | |
| "grad_norm": 0.5398725867271423, | |
| "learning_rate": 1.902734002356887e-05, | |
| "loss": 0.7192749977111816, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.55669099756691, | |
| "grad_norm": 0.5142056941986084, | |
| "learning_rate": 1.9021179178081107e-05, | |
| "loss": 0.6286910772323608, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.5579886455798865, | |
| "grad_norm": 0.5470032095909119, | |
| "learning_rate": 1.9014999886565226e-05, | |
| "loss": 0.6505739092826843, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.559286293592863, | |
| "grad_norm": 0.5600834488868713, | |
| "learning_rate": 1.9008802161656308e-05, | |
| "loss": 0.7014046907424927, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.5605839416058395, | |
| "grad_norm": 0.5533670783042908, | |
| "learning_rate": 1.9002586016027136e-05, | |
| "loss": 0.7095932364463806, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5618815896188158, | |
| "grad_norm": 0.5443385243415833, | |
| "learning_rate": 1.8996351462388153e-05, | |
| "loss": 0.7492538094520569, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.5631792376317923, | |
| "grad_norm": 0.5775622129440308, | |
| "learning_rate": 1.8990098513487447e-05, | |
| "loss": 0.7882871627807617, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5644768856447688, | |
| "grad_norm": 0.5645557045936584, | |
| "learning_rate": 1.898382718211071e-05, | |
| "loss": 0.6681729555130005, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.5657745336577453, | |
| "grad_norm": 0.562117874622345, | |
| "learning_rate": 1.897753748108123e-05, | |
| "loss": 0.7754248380661011, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5670721816707218, | |
| "grad_norm": 0.5395199656486511, | |
| "learning_rate": 1.8971229423259855e-05, | |
| "loss": 0.6584359407424927, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.5683698296836983, | |
| "grad_norm": 0.5511093735694885, | |
| "learning_rate": 1.8964903021544964e-05, | |
| "loss": 0.7121752500534058, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5696674776966748, | |
| "grad_norm": 0.5518468022346497, | |
| "learning_rate": 1.895855828887245e-05, | |
| "loss": 0.7533795237541199, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.5709651257096513, | |
| "grad_norm": 0.541132926940918, | |
| "learning_rate": 1.895219523821568e-05, | |
| "loss": 0.6961894035339355, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5722627737226277, | |
| "grad_norm": 0.5566806197166443, | |
| "learning_rate": 1.894581388258549e-05, | |
| "loss": 0.7168055176734924, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.5735604217356042, | |
| "grad_norm": 0.8438438773155212, | |
| "learning_rate": 1.8939414235030137e-05, | |
| "loss": 0.7322010397911072, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5748580697485807, | |
| "grad_norm": 0.5508759617805481, | |
| "learning_rate": 1.893299630863527e-05, | |
| "loss": 0.689163327217102, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.5761557177615572, | |
| "grad_norm": 0.577190637588501, | |
| "learning_rate": 1.892656011652393e-05, | |
| "loss": 0.7421369552612305, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5774533657745337, | |
| "grad_norm": 0.5557067394256592, | |
| "learning_rate": 1.8920105671856507e-05, | |
| "loss": 0.6984370350837708, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.5787510137875101, | |
| "grad_norm": 0.5880769491195679, | |
| "learning_rate": 1.89136329878307e-05, | |
| "loss": 0.6648968458175659, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5800486618004866, | |
| "grad_norm": 0.5225708484649658, | |
| "learning_rate": 1.890714207768151e-05, | |
| "loss": 0.6399903297424316, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.5813463098134631, | |
| "grad_norm": 2.8270366191864014, | |
| "learning_rate": 1.8900632954681203e-05, | |
| "loss": 0.7426702380180359, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5826439578264396, | |
| "grad_norm": 0.5743777751922607, | |
| "learning_rate": 1.8894105632139296e-05, | |
| "loss": 0.7008408308029175, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 0.5848923325538635, | |
| "learning_rate": 1.8887560123402505e-05, | |
| "loss": 0.7745944261550903, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5852392538523925, | |
| "grad_norm": 0.5533474087715149, | |
| "learning_rate": 1.888099644185474e-05, | |
| "loss": 0.7078051567077637, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.586536901865369, | |
| "grad_norm": 0.5359990000724792, | |
| "learning_rate": 1.887441460091707e-05, | |
| "loss": 0.7025009393692017, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5878345498783455, | |
| "grad_norm": 0.5772839784622192, | |
| "learning_rate": 1.886781461404769e-05, | |
| "loss": 0.7109262347221375, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.589132197891322, | |
| "grad_norm": 0.5491592288017273, | |
| "learning_rate": 1.886119649474191e-05, | |
| "loss": 0.6828133463859558, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5904298459042985, | |
| "grad_norm": 0.5495162606239319, | |
| "learning_rate": 1.8854560256532098e-05, | |
| "loss": 0.6600109338760376, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.591727493917275, | |
| "grad_norm": 0.5773736238479614, | |
| "learning_rate": 1.8847905912987693e-05, | |
| "loss": 0.6746517419815063, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5930251419302515, | |
| "grad_norm": 0.5658586621284485, | |
| "learning_rate": 1.8841233477715136e-05, | |
| "loss": 0.6905688047409058, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5943227899432278, | |
| "grad_norm": 0.544463574886322, | |
| "learning_rate": 1.8834542964357875e-05, | |
| "loss": 0.7656948566436768, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5956204379562043, | |
| "grad_norm": 0.5466704964637756, | |
| "learning_rate": 1.8827834386596306e-05, | |
| "loss": 0.7320756912231445, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5969180859691808, | |
| "grad_norm": 0.534042477607727, | |
| "learning_rate": 1.882110775814778e-05, | |
| "loss": 0.6747853755950928, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5969180859691808, | |
| "eval_loss": 0.7028419375419617, | |
| "eval_runtime": 72.8032, | |
| "eval_samples_per_second": 71.316, | |
| "eval_steps_per_second": 8.914, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5982157339821573, | |
| "grad_norm": 0.5617560148239136, | |
| "learning_rate": 1.881436309276655e-05, | |
| "loss": 0.7175489068031311, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5995133819951338, | |
| "grad_norm": 0.538003146648407, | |
| "learning_rate": 1.8807600404243746e-05, | |
| "loss": 0.6772977709770203, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6008110300081103, | |
| "grad_norm": 0.5164902210235596, | |
| "learning_rate": 1.8800819706407355e-05, | |
| "loss": 0.7026697397232056, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.6021086780210868, | |
| "grad_norm": 0.519985556602478, | |
| "learning_rate": 1.879402101312219e-05, | |
| "loss": 0.6459539532661438, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.6034063260340633, | |
| "grad_norm": 0.5643022060394287, | |
| "learning_rate": 1.8787204338289858e-05, | |
| "loss": 0.7304619550704956, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.6047039740470398, | |
| "grad_norm": 0.5315333604812622, | |
| "learning_rate": 1.8780369695848733e-05, | |
| "loss": 0.7055330872535706, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6060016220600162, | |
| "grad_norm": 0.5695874691009521, | |
| "learning_rate": 1.8773517099773927e-05, | |
| "loss": 0.7567015290260315, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.6072992700729927, | |
| "grad_norm": 0.5361006259918213, | |
| "learning_rate": 1.8766646564077265e-05, | |
| "loss": 0.7254809141159058, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6085969180859692, | |
| "grad_norm": 0.5438353419303894, | |
| "learning_rate": 1.8759758102807253e-05, | |
| "loss": 0.6743266582489014, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6098945660989457, | |
| "grad_norm": 0.5824978351593018, | |
| "learning_rate": 1.8752851730049055e-05, | |
| "loss": 0.7623616456985474, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6111922141119221, | |
| "grad_norm": 0.546610951423645, | |
| "learning_rate": 1.8745927459924454e-05, | |
| "loss": 0.809882640838623, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.6124898621248986, | |
| "grad_norm": 0.5459777116775513, | |
| "learning_rate": 1.8738985306591826e-05, | |
| "loss": 0.6817529201507568, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6137875101378751, | |
| "grad_norm": 0.5381180644035339, | |
| "learning_rate": 1.8732025284246122e-05, | |
| "loss": 0.7059892416000366, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.6150851581508516, | |
| "grad_norm": 0.5245769023895264, | |
| "learning_rate": 1.8725047407118823e-05, | |
| "loss": 0.7031271457672119, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.616382806163828, | |
| "grad_norm": 0.5284971594810486, | |
| "learning_rate": 1.8718051689477923e-05, | |
| "loss": 0.7379744052886963, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.6176804541768045, | |
| "grad_norm": 0.5659690499305725, | |
| "learning_rate": 1.8711038145627893e-05, | |
| "loss": 0.7798171639442444, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.618978102189781, | |
| "grad_norm": 0.5460679531097412, | |
| "learning_rate": 1.8704006789909654e-05, | |
| "loss": 0.7433549165725708, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.6202757502027575, | |
| "grad_norm": 0.5171265602111816, | |
| "learning_rate": 1.8696957636700555e-05, | |
| "loss": 0.7264508008956909, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.621573398215734, | |
| "grad_norm": 0.5979129672050476, | |
| "learning_rate": 1.868989070041432e-05, | |
| "loss": 0.7511105537414551, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.6228710462287105, | |
| "grad_norm": 0.5520970225334167, | |
| "learning_rate": 1.8682805995501052e-05, | |
| "loss": 0.6946426630020142, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.624168694241687, | |
| "grad_norm": 0.5510658025741577, | |
| "learning_rate": 1.8675703536447178e-05, | |
| "loss": 0.7265397310256958, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.6254663422546635, | |
| "grad_norm": 0.5842864513397217, | |
| "learning_rate": 1.866858333777543e-05, | |
| "loss": 0.7219571471214294, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.6267639902676398, | |
| "grad_norm": 0.5430331826210022, | |
| "learning_rate": 1.8661445414044813e-05, | |
| "loss": 0.7292179465293884, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.6280616382806163, | |
| "grad_norm": 0.5456423759460449, | |
| "learning_rate": 1.865428977985057e-05, | |
| "loss": 0.7341865301132202, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6293592862935928, | |
| "grad_norm": 0.55687415599823, | |
| "learning_rate": 1.8647116449824165e-05, | |
| "loss": 0.7712036371231079, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.6306569343065693, | |
| "grad_norm": 0.574967622756958, | |
| "learning_rate": 1.8639925438633243e-05, | |
| "loss": 0.7341934442520142, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.6319545823195458, | |
| "grad_norm": 0.575878381729126, | |
| "learning_rate": 1.86327167609816e-05, | |
| "loss": 0.6782741546630859, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.6332522303325223, | |
| "grad_norm": 0.5638167858123779, | |
| "learning_rate": 1.8625490431609154e-05, | |
| "loss": 0.8088809251785278, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.6345498783454988, | |
| "grad_norm": 0.547574520111084, | |
| "learning_rate": 1.8618246465291925e-05, | |
| "loss": 0.7108902335166931, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.6358475263584753, | |
| "grad_norm": 0.5785483121871948, | |
| "learning_rate": 1.861098487684199e-05, | |
| "loss": 0.6963984370231628, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6371451743714518, | |
| "grad_norm": 0.547226071357727, | |
| "learning_rate": 1.8603705681107456e-05, | |
| "loss": 0.6772190928459167, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.6384428223844282, | |
| "grad_norm": 0.5494422912597656, | |
| "learning_rate": 1.8596408892972442e-05, | |
| "loss": 0.7243861556053162, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6397404703974047, | |
| "grad_norm": 0.5267540216445923, | |
| "learning_rate": 1.858909452735703e-05, | |
| "loss": 0.6649144887924194, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.6410381184103812, | |
| "grad_norm": 0.5952751636505127, | |
| "learning_rate": 1.858176259921724e-05, | |
| "loss": 0.7574429512023926, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 0.5476658344268799, | |
| "learning_rate": 1.857441312354502e-05, | |
| "loss": 0.6968377828598022, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.6436334144363342, | |
| "grad_norm": 0.5507075786590576, | |
| "learning_rate": 1.856704611536818e-05, | |
| "loss": 0.7353919744491577, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.6449310624493106, | |
| "grad_norm": 0.5495625734329224, | |
| "learning_rate": 1.8559661589750387e-05, | |
| "loss": 0.7162117958068848, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.6462287104622871, | |
| "grad_norm": 0.5721608996391296, | |
| "learning_rate": 1.8552259561791133e-05, | |
| "loss": 0.6986855268478394, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6475263584752636, | |
| "grad_norm": 0.5700922608375549, | |
| "learning_rate": 1.8544840046625686e-05, | |
| "loss": 0.8195285797119141, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.64882400648824, | |
| "grad_norm": 0.5746553540229797, | |
| "learning_rate": 1.8537403059425082e-05, | |
| "loss": 0.7492556571960449, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6501216545012165, | |
| "grad_norm": 0.5598172545433044, | |
| "learning_rate": 1.852994861539607e-05, | |
| "loss": 0.6921173930168152, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.651419302514193, | |
| "grad_norm": 0.5589975714683533, | |
| "learning_rate": 1.8522476729781106e-05, | |
| "loss": 0.7157631516456604, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6527169505271695, | |
| "grad_norm": 0.5745802521705627, | |
| "learning_rate": 1.8514987417858306e-05, | |
| "loss": 0.7679554224014282, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.654014598540146, | |
| "grad_norm": 0.581063449382782, | |
| "learning_rate": 1.8507480694941416e-05, | |
| "loss": 0.7761994004249573, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6553122465531225, | |
| "grad_norm": 0.5932230353355408, | |
| "learning_rate": 1.849995657637978e-05, | |
| "loss": 0.748866081237793, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.656609894566099, | |
| "grad_norm": 0.5524072647094727, | |
| "learning_rate": 1.8492415077558325e-05, | |
| "loss": 0.7764031887054443, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6579075425790755, | |
| "grad_norm": 0.5266931653022766, | |
| "learning_rate": 1.8484856213897496e-05, | |
| "loss": 0.7512728571891785, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.659205190592052, | |
| "grad_norm": 0.5363677740097046, | |
| "learning_rate": 1.847728000085327e-05, | |
| "loss": 0.7477032542228699, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6605028386050283, | |
| "grad_norm": 0.5348376035690308, | |
| "learning_rate": 1.8469686453917074e-05, | |
| "loss": 0.6908712387084961, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.6618004866180048, | |
| "grad_norm": 0.5489766597747803, | |
| "learning_rate": 1.846207558861579e-05, | |
| "loss": 0.7576340436935425, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6630981346309813, | |
| "grad_norm": 0.5426369309425354, | |
| "learning_rate": 1.845444742051172e-05, | |
| "loss": 0.7107582092285156, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.6643957826439578, | |
| "grad_norm": 0.5308833718299866, | |
| "learning_rate": 1.8446801965202524e-05, | |
| "loss": 0.6590298414230347, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6656934306569343, | |
| "grad_norm": 0.5621533989906311, | |
| "learning_rate": 1.8439139238321235e-05, | |
| "loss": 0.7291080355644226, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.6669910786699108, | |
| "grad_norm": 0.5651385188102722, | |
| "learning_rate": 1.8431459255536185e-05, | |
| "loss": 0.7855580449104309, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.6682887266828873, | |
| "grad_norm": 0.5611156225204468, | |
| "learning_rate": 1.8423762032551e-05, | |
| "loss": 0.6918215751647949, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.6695863746958638, | |
| "grad_norm": 0.5477362275123596, | |
| "learning_rate": 1.841604758510454e-05, | |
| "loss": 0.7025431394577026, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6708840227088402, | |
| "grad_norm": 0.5612704753875732, | |
| "learning_rate": 1.840831592897091e-05, | |
| "loss": 0.7540648579597473, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.6721816707218167, | |
| "grad_norm": 0.5650063753128052, | |
| "learning_rate": 1.8400567079959383e-05, | |
| "loss": 0.7409968376159668, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6734793187347932, | |
| "grad_norm": 0.5648168921470642, | |
| "learning_rate": 1.8392801053914396e-05, | |
| "loss": 0.754462718963623, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.6747769667477697, | |
| "grad_norm": 0.5603179931640625, | |
| "learning_rate": 1.8385017866715507e-05, | |
| "loss": 0.7388665080070496, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6760746147607462, | |
| "grad_norm": 0.5628640651702881, | |
| "learning_rate": 1.8377217534277365e-05, | |
| "loss": 0.7781612873077393, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.6773722627737226, | |
| "grad_norm": 0.593789279460907, | |
| "learning_rate": 1.8369400072549674e-05, | |
| "loss": 0.753161609172821, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6786699107866991, | |
| "grad_norm": 0.5755636096000671, | |
| "learning_rate": 1.8361565497517166e-05, | |
| "loss": 0.7570379972457886, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.6799675587996756, | |
| "grad_norm": 0.5607541799545288, | |
| "learning_rate": 1.835371382519956e-05, | |
| "loss": 0.777469277381897, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.681265206812652, | |
| "grad_norm": 0.4994042217731476, | |
| "learning_rate": 1.8345845071651543e-05, | |
| "loss": 0.6544281840324402, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.6825628548256285, | |
| "grad_norm": 0.5685398578643799, | |
| "learning_rate": 1.8337959252962728e-05, | |
| "loss": 0.7024877071380615, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.683860502838605, | |
| "grad_norm": 0.5343568325042725, | |
| "learning_rate": 1.8330056385257607e-05, | |
| "loss": 0.7003896832466125, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.6851581508515815, | |
| "grad_norm": 0.5208355188369751, | |
| "learning_rate": 1.8322136484695553e-05, | |
| "loss": 0.6797738075256348, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.686455798864558, | |
| "grad_norm": 0.5621144771575928, | |
| "learning_rate": 1.8314199567470755e-05, | |
| "loss": 0.6609838008880615, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.6877534468775345, | |
| "grad_norm": 0.577298104763031, | |
| "learning_rate": 1.83062456498122e-05, | |
| "loss": 0.711292028427124, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.689051094890511, | |
| "grad_norm": 0.5840193629264832, | |
| "learning_rate": 1.8298274747983638e-05, | |
| "loss": 0.7950271368026733, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.6903487429034875, | |
| "grad_norm": 0.5348870158195496, | |
| "learning_rate": 1.8290286878283542e-05, | |
| "loss": 0.6982176303863525, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.691646390916464, | |
| "grad_norm": 0.5467864871025085, | |
| "learning_rate": 1.8282282057045087e-05, | |
| "loss": 0.7555949687957764, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.6929440389294403, | |
| "grad_norm": 0.5581674575805664, | |
| "learning_rate": 1.827426030063611e-05, | |
| "loss": 0.6723984479904175, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6942416869424168, | |
| "grad_norm": 0.5615087151527405, | |
| "learning_rate": 1.8266221625459064e-05, | |
| "loss": 0.7201924324035645, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.6955393349553933, | |
| "grad_norm": 0.5710893273353577, | |
| "learning_rate": 1.825816604795101e-05, | |
| "loss": 0.7096928358078003, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6968369829683698, | |
| "grad_norm": 0.5586241483688354, | |
| "learning_rate": 1.8250093584583567e-05, | |
| "loss": 0.7197962999343872, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.6981346309813463, | |
| "grad_norm": 0.5536755323410034, | |
| "learning_rate": 1.8242004251862872e-05, | |
| "loss": 0.678354799747467, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6994322789943228, | |
| "grad_norm": 0.5744696855545044, | |
| "learning_rate": 1.823389806632957e-05, | |
| "loss": 0.7439010739326477, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 0.5338960886001587, | |
| "learning_rate": 1.8225775044558757e-05, | |
| "loss": 0.731925904750824, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7020275750202758, | |
| "grad_norm": 0.5696558356285095, | |
| "learning_rate": 1.8217635203159957e-05, | |
| "loss": 0.7480655312538147, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.7033252230332522, | |
| "grad_norm": 0.5994415283203125, | |
| "learning_rate": 1.8209478558777084e-05, | |
| "loss": 0.776438295841217, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.7046228710462287, | |
| "grad_norm": 0.578956127166748, | |
| "learning_rate": 1.8201305128088412e-05, | |
| "loss": 0.7190870046615601, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.7059205190592052, | |
| "grad_norm": 0.557142972946167, | |
| "learning_rate": 1.819311492780654e-05, | |
| "loss": 0.7524915933609009, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7072181670721817, | |
| "grad_norm": 0.5244631171226501, | |
| "learning_rate": 1.8184907974678348e-05, | |
| "loss": 0.6941534876823425, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.7085158150851582, | |
| "grad_norm": 0.5301777720451355, | |
| "learning_rate": 1.8176684285484985e-05, | |
| "loss": 0.7010957598686218, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.7098134630981346, | |
| "grad_norm": 0.5309736728668213, | |
| "learning_rate": 1.816844387704181e-05, | |
| "loss": 0.6693360209465027, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 0.5188398361206055, | |
| "learning_rate": 1.8160186766198375e-05, | |
| "loss": 0.7254098057746887, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.7124087591240876, | |
| "grad_norm": 0.5340986847877502, | |
| "learning_rate": 1.815191296983838e-05, | |
| "loss": 0.7227193713188171, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.7137064071370641, | |
| "grad_norm": 0.5604742765426636, | |
| "learning_rate": 1.8143622504879647e-05, | |
| "loss": 0.6893896460533142, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7150040551500405, | |
| "grad_norm": 0.5265613794326782, | |
| "learning_rate": 1.8135315388274075e-05, | |
| "loss": 0.7178789377212524, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.716301703163017, | |
| "grad_norm": 0.5819421410560608, | |
| "learning_rate": 1.8126991637007618e-05, | |
| "loss": 0.7809138298034668, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7175993511759935, | |
| "grad_norm": 0.5548515915870667, | |
| "learning_rate": 1.8118651268100235e-05, | |
| "loss": 0.7398655414581299, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.71889699918897, | |
| "grad_norm": 0.5281164050102234, | |
| "learning_rate": 1.811029429860588e-05, | |
| "loss": 0.7255332469940186, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.7201946472019465, | |
| "grad_norm": 0.51970374584198, | |
| "learning_rate": 1.810192074561243e-05, | |
| "loss": 0.6958039999008179, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.721492295214923, | |
| "grad_norm": 0.5574509501457214, | |
| "learning_rate": 1.8093530626241684e-05, | |
| "loss": 0.77367103099823, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.7227899432278995, | |
| "grad_norm": 0.5539534687995911, | |
| "learning_rate": 1.8085123957649315e-05, | |
| "loss": 0.7615116834640503, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.724087591240876, | |
| "grad_norm": 0.549517035484314, | |
| "learning_rate": 1.8076700757024833e-05, | |
| "loss": 0.777897834777832, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.7253852392538523, | |
| "grad_norm": 0.5480270981788635, | |
| "learning_rate": 1.8068261041591548e-05, | |
| "loss": 0.7139554619789124, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.7266828872668288, | |
| "grad_norm": 0.5337988138198853, | |
| "learning_rate": 1.8059804828606545e-05, | |
| "loss": 0.7470839023590088, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7279805352798053, | |
| "grad_norm": 0.5055403709411621, | |
| "learning_rate": 1.8051332135360637e-05, | |
| "loss": 0.6575566530227661, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.7292781832927818, | |
| "grad_norm": 0.5452354550361633, | |
| "learning_rate": 1.8042842979178338e-05, | |
| "loss": 0.7080937623977661, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.7305758313057583, | |
| "grad_norm": 0.5276215672492981, | |
| "learning_rate": 1.8034337377417826e-05, | |
| "loss": 0.6609282493591309, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.7318734793187348, | |
| "grad_norm": 0.5823485851287842, | |
| "learning_rate": 1.80258153474709e-05, | |
| "loss": 0.7274823784828186, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.7331711273317113, | |
| "grad_norm": 0.5385794043540955, | |
| "learning_rate": 1.8017276906762955e-05, | |
| "loss": 0.6209210157394409, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.7344687753446878, | |
| "grad_norm": 0.6051076054573059, | |
| "learning_rate": 1.8008722072752943e-05, | |
| "loss": 0.7948423624038696, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.7357664233576642, | |
| "grad_norm": 0.8337801098823547, | |
| "learning_rate": 1.8000150862933335e-05, | |
| "loss": 0.7299556732177734, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.7370640713706407, | |
| "grad_norm": 0.5429887771606445, | |
| "learning_rate": 1.7991563294830083e-05, | |
| "loss": 0.686081051826477, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.7383617193836172, | |
| "grad_norm": 0.5419583916664124, | |
| "learning_rate": 1.7982959386002592e-05, | |
| "loss": 0.7415616512298584, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.7396593673965937, | |
| "grad_norm": 0.5454174280166626, | |
| "learning_rate": 1.7974339154043677e-05, | |
| "loss": 0.7275187969207764, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.7409570154095702, | |
| "grad_norm": 0.5611673593521118, | |
| "learning_rate": 1.796570261657953e-05, | |
| "loss": 0.7872575521469116, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.7422546634225466, | |
| "grad_norm": 0.5598644018173218, | |
| "learning_rate": 1.7957049791269684e-05, | |
| "loss": 0.7327409982681274, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7435523114355231, | |
| "grad_norm": 0.558341920375824, | |
| "learning_rate": 1.7948380695806983e-05, | |
| "loss": 0.711640477180481, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.7448499594484996, | |
| "grad_norm": 0.5189648270606995, | |
| "learning_rate": 1.793969534791752e-05, | |
| "loss": 0.6593164801597595, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7461476074614761, | |
| "grad_norm": 0.5739206671714783, | |
| "learning_rate": 1.7930993765360644e-05, | |
| "loss": 0.775146484375, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.7474452554744525, | |
| "grad_norm": 0.5306016802787781, | |
| "learning_rate": 1.792227596592889e-05, | |
| "loss": 0.6946839094161987, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.748742903487429, | |
| "grad_norm": 0.5487167835235596, | |
| "learning_rate": 1.791354196744794e-05, | |
| "loss": 0.7318082451820374, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.7500405515004055, | |
| "grad_norm": 0.5554513931274414, | |
| "learning_rate": 1.790479178777662e-05, | |
| "loss": 0.727341890335083, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.751338199513382, | |
| "grad_norm": 0.5512000918388367, | |
| "learning_rate": 1.7896025444806834e-05, | |
| "loss": 0.7673891186714172, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.7526358475263585, | |
| "grad_norm": 0.5614628195762634, | |
| "learning_rate": 1.7887242956463528e-05, | |
| "loss": 0.7410103678703308, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.753933495539335, | |
| "grad_norm": 0.5414284467697144, | |
| "learning_rate": 1.7878444340704666e-05, | |
| "loss": 0.7189674377441406, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.7552311435523115, | |
| "grad_norm": 0.5145770311355591, | |
| "learning_rate": 1.78696296155212e-05, | |
| "loss": 0.6776304244995117, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.756528791565288, | |
| "grad_norm": 0.5401176810264587, | |
| "learning_rate": 1.7860798798937e-05, | |
| "loss": 0.6960833072662354, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.7578264395782643, | |
| "grad_norm": 0.5560998916625977, | |
| "learning_rate": 1.7851951909008864e-05, | |
| "loss": 0.6736742258071899, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 0.5505719780921936, | |
| "learning_rate": 1.7843088963826437e-05, | |
| "loss": 0.6757134795188904, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.7604217356042173, | |
| "grad_norm": 0.5717475414276123, | |
| "learning_rate": 1.783420998151219e-05, | |
| "loss": 0.7612842321395874, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.7617193836171938, | |
| "grad_norm": 0.5554843544960022, | |
| "learning_rate": 1.782531498022141e-05, | |
| "loss": 0.705300509929657, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.7630170316301703, | |
| "grad_norm": 0.5320503115653992, | |
| "learning_rate": 1.781640397814211e-05, | |
| "loss": 0.7508092522621155, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7643146796431468, | |
| "grad_norm": 0.5554909706115723, | |
| "learning_rate": 1.7807476993495047e-05, | |
| "loss": 0.7732164859771729, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.7656123276561233, | |
| "grad_norm": 0.5467298030853271, | |
| "learning_rate": 1.779853404453363e-05, | |
| "loss": 0.7246618270874023, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7669099756690998, | |
| "grad_norm": 0.5365788340568542, | |
| "learning_rate": 1.7789575149543936e-05, | |
| "loss": 0.6982936263084412, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.7682076236820763, | |
| "grad_norm": 0.5504671931266785, | |
| "learning_rate": 1.7780600326844638e-05, | |
| "loss": 0.7263147830963135, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7695052716950527, | |
| "grad_norm": 0.549707293510437, | |
| "learning_rate": 1.7771609594786968e-05, | |
| "loss": 0.7235106229782104, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.7708029197080292, | |
| "grad_norm": 0.5401800274848938, | |
| "learning_rate": 1.776260297175471e-05, | |
| "loss": 0.7632750272750854, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7721005677210057, | |
| "grad_norm": 0.5245280265808105, | |
| "learning_rate": 1.775358047616412e-05, | |
| "loss": 0.6609013080596924, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.7733982157339822, | |
| "grad_norm": 0.5566380023956299, | |
| "learning_rate": 1.774454212646392e-05, | |
| "loss": 0.7397713661193848, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7746958637469586, | |
| "grad_norm": 0.5788303017616272, | |
| "learning_rate": 1.773548794113525e-05, | |
| "loss": 0.6708486676216125, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.7759935117599351, | |
| "grad_norm": 0.5494595170021057, | |
| "learning_rate": 1.772641793869162e-05, | |
| "loss": 0.7761523723602295, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7772911597729116, | |
| "grad_norm": 0.5339208245277405, | |
| "learning_rate": 1.7717332137678895e-05, | |
| "loss": 0.6619516611099243, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.7785888077858881, | |
| "grad_norm": 0.5362167358398438, | |
| "learning_rate": 1.770823055667524e-05, | |
| "loss": 0.7144718170166016, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7798864557988645, | |
| "grad_norm": 0.5141735076904297, | |
| "learning_rate": 1.7699113214291082e-05, | |
| "loss": 0.6293293237686157, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.781184103811841, | |
| "grad_norm": 0.5582875609397888, | |
| "learning_rate": 1.768998012916908e-05, | |
| "loss": 0.7720483541488647, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7824817518248175, | |
| "grad_norm": 0.5367119312286377, | |
| "learning_rate": 1.7680831319984077e-05, | |
| "loss": 0.705078661441803, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.783779399837794, | |
| "grad_norm": 0.5382807850837708, | |
| "learning_rate": 1.7671666805443076e-05, | |
| "loss": 0.7088773846626282, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7850770478507705, | |
| "grad_norm": 0.5625648498535156, | |
| "learning_rate": 1.766248660428519e-05, | |
| "loss": 0.7392460703849792, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.786374695863747, | |
| "grad_norm": 0.5586503744125366, | |
| "learning_rate": 1.7653290735281605e-05, | |
| "loss": 0.7484114170074463, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7876723438767235, | |
| "grad_norm": 0.5572494864463806, | |
| "learning_rate": 1.7644079217235547e-05, | |
| "loss": 0.7409180402755737, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.7889699918897, | |
| "grad_norm": 0.5369569659233093, | |
| "learning_rate": 1.763485206898224e-05, | |
| "loss": 0.6471737027168274, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7902676399026763, | |
| "grad_norm": 0.5504409074783325, | |
| "learning_rate": 1.762560930938886e-05, | |
| "loss": 0.7778940200805664, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.7915652879156528, | |
| "grad_norm": 0.5358904600143433, | |
| "learning_rate": 1.7616350957354523e-05, | |
| "loss": 0.694309413433075, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7928629359286293, | |
| "grad_norm": 0.5360654592514038, | |
| "learning_rate": 1.7607077031810204e-05, | |
| "loss": 0.6945086717605591, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.7941605839416058, | |
| "grad_norm": 0.535325825214386, | |
| "learning_rate": 1.759778755171874e-05, | |
| "loss": 0.7578423619270325, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7954582319545823, | |
| "grad_norm": 0.5466883182525635, | |
| "learning_rate": 1.758848253607476e-05, | |
| "loss": 0.7157893180847168, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.7967558799675588, | |
| "grad_norm": 0.5534203052520752, | |
| "learning_rate": 1.7579162003904678e-05, | |
| "loss": 0.7312074303627014, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7980535279805353, | |
| "grad_norm": 0.5488491654396057, | |
| "learning_rate": 1.756982597426661e-05, | |
| "loss": 0.7318480014801025, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.7993511759935118, | |
| "grad_norm": 0.5375532507896423, | |
| "learning_rate": 1.756047446625038e-05, | |
| "loss": 0.7143536806106567, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.8006488240064883, | |
| "grad_norm": 0.5791228413581848, | |
| "learning_rate": 1.7551107498977458e-05, | |
| "loss": 0.642976701259613, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.8019464720194647, | |
| "grad_norm": 0.5346726179122925, | |
| "learning_rate": 1.7541725091600918e-05, | |
| "loss": 0.687232255935669, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.8032441200324412, | |
| "grad_norm": 0.5417895913124084, | |
| "learning_rate": 1.7532327263305405e-05, | |
| "loss": 0.7081488370895386, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.8045417680454177, | |
| "grad_norm": 0.5509006381034851, | |
| "learning_rate": 1.75229140333071e-05, | |
| "loss": 0.7728561162948608, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8058394160583942, | |
| "grad_norm": 0.5634705424308777, | |
| "learning_rate": 1.7513485420853683e-05, | |
| "loss": 0.6951034069061279, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.8071370640713706, | |
| "grad_norm": 0.5197573900222778, | |
| "learning_rate": 1.750404144522427e-05, | |
| "loss": 0.7106211185455322, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.8084347120843471, | |
| "grad_norm": 0.5803437232971191, | |
| "learning_rate": 1.7494582125729408e-05, | |
| "loss": 0.7436937689781189, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.8097323600973236, | |
| "grad_norm": 0.541920006275177, | |
| "learning_rate": 1.7485107481711014e-05, | |
| "loss": 0.6682834029197693, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8110300081103001, | |
| "grad_norm": 0.561758279800415, | |
| "learning_rate": 1.7475617532542325e-05, | |
| "loss": 0.6873137950897217, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.8123276561232765, | |
| "grad_norm": 0.5416638255119324, | |
| "learning_rate": 1.7466112297627894e-05, | |
| "loss": 0.7167541980743408, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.813625304136253, | |
| "grad_norm": 0.5338025093078613, | |
| "learning_rate": 1.7456591796403525e-05, | |
| "loss": 0.7321476340293884, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.8149229521492295, | |
| "grad_norm": 0.5378256440162659, | |
| "learning_rate": 1.744705604833622e-05, | |
| "loss": 0.6663627624511719, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.816220600162206, | |
| "grad_norm": 0.581386387348175, | |
| "learning_rate": 1.7437505072924177e-05, | |
| "loss": 0.755516767501831, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 0.581896185874939, | |
| "learning_rate": 1.742793888969673e-05, | |
| "loss": 0.7974879145622253, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.818815896188159, | |
| "grad_norm": 0.521468460559845, | |
| "learning_rate": 1.741835751821429e-05, | |
| "loss": 0.7400495409965515, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.8201135442011355, | |
| "grad_norm": 0.5232843160629272, | |
| "learning_rate": 1.7408760978068343e-05, | |
| "loss": 0.6786386966705322, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.821411192214112, | |
| "grad_norm": 0.5813708901405334, | |
| "learning_rate": 1.739914928888139e-05, | |
| "loss": 0.7453535199165344, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.8227088402270885, | |
| "grad_norm": 0.5424124002456665, | |
| "learning_rate": 1.7389522470306892e-05, | |
| "loss": 0.7520110607147217, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.8240064882400648, | |
| "grad_norm": 0.5089052319526672, | |
| "learning_rate": 1.7379880542029263e-05, | |
| "loss": 0.7197295427322388, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.8253041362530413, | |
| "grad_norm": 0.5367469191551208, | |
| "learning_rate": 1.7370223523763804e-05, | |
| "loss": 0.7498934864997864, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.8266017842660178, | |
| "grad_norm": 0.5291455388069153, | |
| "learning_rate": 1.7360551435256673e-05, | |
| "loss": 0.7376183867454529, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.8278994322789943, | |
| "grad_norm": 0.5446896553039551, | |
| "learning_rate": 1.7350864296284846e-05, | |
| "loss": 0.735445499420166, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.8291970802919708, | |
| "grad_norm": 0.5124339461326599, | |
| "learning_rate": 1.7341162126656063e-05, | |
| "loss": 0.6861530542373657, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.8304947283049473, | |
| "grad_norm": 0.5077775120735168, | |
| "learning_rate": 1.7331444946208815e-05, | |
| "loss": 0.688785195350647, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8317923763179238, | |
| "grad_norm": 0.5058798789978027, | |
| "learning_rate": 1.732171277481227e-05, | |
| "loss": 0.7133075594902039, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.8330900243309003, | |
| "grad_norm": 0.5404756665229797, | |
| "learning_rate": 1.7311965632366254e-05, | |
| "loss": 0.7240495681762695, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.8343876723438767, | |
| "grad_norm": 0.5313534736633301, | |
| "learning_rate": 1.7302203538801212e-05, | |
| "loss": 0.71756911277771, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.8356853203568532, | |
| "grad_norm": 0.5360015630722046, | |
| "learning_rate": 1.729242651407815e-05, | |
| "loss": 0.7652734518051147, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8369829683698297, | |
| "grad_norm": 0.540046751499176, | |
| "learning_rate": 1.7282634578188612e-05, | |
| "loss": 0.7294871807098389, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.8382806163828062, | |
| "grad_norm": 0.5653432607650757, | |
| "learning_rate": 1.7272827751154627e-05, | |
| "loss": 0.7391757965087891, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.8395782643957826, | |
| "grad_norm": 0.5427312850952148, | |
| "learning_rate": 1.7263006053028674e-05, | |
| "loss": 0.6798534393310547, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.8408759124087591, | |
| "grad_norm": 0.539861261844635, | |
| "learning_rate": 1.7253169503893637e-05, | |
| "loss": 0.7292792201042175, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.8421735604217356, | |
| "grad_norm": 0.5300166010856628, | |
| "learning_rate": 1.7243318123862777e-05, | |
| "loss": 0.7026904821395874, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.8434712084347121, | |
| "grad_norm": 0.5242528319358826, | |
| "learning_rate": 1.7233451933079663e-05, | |
| "loss": 0.6926451921463013, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8447688564476885, | |
| "grad_norm": 0.5352111458778381, | |
| "learning_rate": 1.7223570951718166e-05, | |
| "loss": 0.7006164789199829, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.846066504460665, | |
| "grad_norm": 0.5747525095939636, | |
| "learning_rate": 1.7213675199982388e-05, | |
| "loss": 0.7685414552688599, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.8473641524736415, | |
| "grad_norm": 0.5309545397758484, | |
| "learning_rate": 1.7203764698106636e-05, | |
| "loss": 0.7312856912612915, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.848661800486618, | |
| "grad_norm": 0.5124905705451965, | |
| "learning_rate": 1.7193839466355383e-05, | |
| "loss": 0.6484863758087158, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.8499594484995945, | |
| "grad_norm": 0.5323530435562134, | |
| "learning_rate": 1.7183899525023212e-05, | |
| "loss": 0.694681704044342, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.851257096512571, | |
| "grad_norm": 0.5242999792098999, | |
| "learning_rate": 1.7173944894434783e-05, | |
| "loss": 0.6672481298446655, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8525547445255475, | |
| "grad_norm": 0.5519501566886902, | |
| "learning_rate": 1.7163975594944807e-05, | |
| "loss": 0.7557801604270935, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.853852392538524, | |
| "grad_norm": 0.5345069169998169, | |
| "learning_rate": 1.715399164693797e-05, | |
| "loss": 0.7127410173416138, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8551500405515005, | |
| "grad_norm": 0.5087319016456604, | |
| "learning_rate": 1.7143993070828913e-05, | |
| "loss": 0.6801098585128784, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.8564476885644768, | |
| "grad_norm": 0.546444833278656, | |
| "learning_rate": 1.713397988706221e-05, | |
| "loss": 0.7135753631591797, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8577453365774533, | |
| "grad_norm": 0.5438613891601562, | |
| "learning_rate": 1.7123952116112275e-05, | |
| "loss": 0.7199326753616333, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.8590429845904298, | |
| "grad_norm": 0.5320620536804199, | |
| "learning_rate": 1.7113909778483364e-05, | |
| "loss": 0.7263282537460327, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.8603406326034063, | |
| "grad_norm": 0.5496207475662231, | |
| "learning_rate": 1.7103852894709517e-05, | |
| "loss": 0.6767710447311401, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.8616382806163828, | |
| "grad_norm": 0.5515886545181274, | |
| "learning_rate": 1.7093781485354517e-05, | |
| "loss": 0.666580319404602, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8629359286293593, | |
| "grad_norm": 0.5425974130630493, | |
| "learning_rate": 1.7083695571011842e-05, | |
| "loss": 0.7289122343063354, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.8642335766423358, | |
| "grad_norm": 0.5263716578483582, | |
| "learning_rate": 1.707359517230464e-05, | |
| "loss": 0.6910987496376038, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8655312246553123, | |
| "grad_norm": 0.525571346282959, | |
| "learning_rate": 1.7063480309885668e-05, | |
| "loss": 0.6733009815216064, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.8668288726682887, | |
| "grad_norm": 0.5529440641403198, | |
| "learning_rate": 1.7053351004437258e-05, | |
| "loss": 0.6993213295936584, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.8681265206812652, | |
| "grad_norm": 0.5263779163360596, | |
| "learning_rate": 1.7043207276671276e-05, | |
| "loss": 0.7125247120857239, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.8694241686942417, | |
| "grad_norm": 0.5178059935569763, | |
| "learning_rate": 1.7033049147329077e-05, | |
| "loss": 0.7389542460441589, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8707218167072182, | |
| "grad_norm": 0.5027527809143066, | |
| "learning_rate": 1.702287663718147e-05, | |
| "loss": 0.6378510594367981, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.8720194647201946, | |
| "grad_norm": 0.5320873260498047, | |
| "learning_rate": 1.7012689767028656e-05, | |
| "loss": 0.6820501089096069, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8733171127331711, | |
| "grad_norm": 0.5544079542160034, | |
| "learning_rate": 1.700248855770021e-05, | |
| "loss": 0.7887839078903198, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.8746147607461476, | |
| "grad_norm": 0.5328344702720642, | |
| "learning_rate": 1.6992273030055022e-05, | |
| "loss": 0.7038314938545227, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.5509505867958069, | |
| "learning_rate": 1.6982043204981264e-05, | |
| "loss": 0.7049298286437988, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.8772100567721006, | |
| "grad_norm": 0.5168129205703735, | |
| "learning_rate": 1.6971799103396332e-05, | |
| "loss": 0.6959193348884583, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.878507704785077, | |
| "grad_norm": 0.5376099944114685, | |
| "learning_rate": 1.696154074624683e-05, | |
| "loss": 0.7292076349258423, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.8798053527980535, | |
| "grad_norm": 0.5142057538032532, | |
| "learning_rate": 1.6951268154508497e-05, | |
| "loss": 0.7193281650543213, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.88110300081103, | |
| "grad_norm": 0.5402371287345886, | |
| "learning_rate": 1.6940981349186182e-05, | |
| "loss": 0.748397946357727, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.8824006488240065, | |
| "grad_norm": 0.5436865091323853, | |
| "learning_rate": 1.69306803513138e-05, | |
| "loss": 0.7238379716873169, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.883698296836983, | |
| "grad_norm": 0.5323321223258972, | |
| "learning_rate": 1.6920365181954284e-05, | |
| "loss": 0.7368711829185486, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.8849959448499595, | |
| "grad_norm": 0.5474384427070618, | |
| "learning_rate": 1.6910035862199545e-05, | |
| "loss": 0.7030202746391296, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.886293592862936, | |
| "grad_norm": 0.5428197979927063, | |
| "learning_rate": 1.6899692413170422e-05, | |
| "loss": 0.713437557220459, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.8875912408759125, | |
| "grad_norm": 0.5502634048461914, | |
| "learning_rate": 1.688933485601666e-05, | |
| "loss": 0.7090182304382324, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.5356465578079224, | |
| "learning_rate": 1.6878963211916833e-05, | |
| "loss": 0.7201128005981445, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.8901865369018653, | |
| "grad_norm": 0.5563944578170776, | |
| "learning_rate": 1.6868577502078336e-05, | |
| "loss": 0.7264722585678101, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8914841849148418, | |
| "grad_norm": 0.5522723197937012, | |
| "learning_rate": 1.6858177747737312e-05, | |
| "loss": 0.7600725889205933, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.8927818329278183, | |
| "grad_norm": 0.49715539813041687, | |
| "learning_rate": 1.684776397015863e-05, | |
| "loss": 0.6456987857818604, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8940794809407948, | |
| "grad_norm": 0.5162433981895447, | |
| "learning_rate": 1.6837336190635824e-05, | |
| "loss": 0.6648015379905701, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.8953771289537713, | |
| "grad_norm": 0.5113485455513, | |
| "learning_rate": 1.682689443049107e-05, | |
| "loss": 0.7002501487731934, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8953771289537713, | |
| "eval_loss": 0.6879991292953491, | |
| "eval_runtime": 72.6036, | |
| "eval_samples_per_second": 71.512, | |
| "eval_steps_per_second": 8.939, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8966747769667478, | |
| "grad_norm": 0.5129652619361877, | |
| "learning_rate": 1.6816438711075114e-05, | |
| "loss": 0.7118932008743286, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.8979724249797243, | |
| "grad_norm": 0.5204065442085266, | |
| "learning_rate": 1.680596905376727e-05, | |
| "loss": 0.7194908857345581, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8992700729927007, | |
| "grad_norm": 0.5264798402786255, | |
| "learning_rate": 1.6795485479975327e-05, | |
| "loss": 0.6868776082992554, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.9005677210056772, | |
| "grad_norm": 0.5244487524032593, | |
| "learning_rate": 1.6784988011135546e-05, | |
| "loss": 0.7106890678405762, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.9018653690186537, | |
| "grad_norm": 0.5397396683692932, | |
| "learning_rate": 1.6774476668712587e-05, | |
| "loss": 0.695647656917572, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.9031630170316302, | |
| "grad_norm": 0.5147722959518433, | |
| "learning_rate": 1.676395147419949e-05, | |
| "loss": 0.7283300161361694, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.9044606650446066, | |
| "grad_norm": 0.5326966047286987, | |
| "learning_rate": 1.6753412449117615e-05, | |
| "loss": 0.7349389791488647, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.9057583130575831, | |
| "grad_norm": 0.522964596748352, | |
| "learning_rate": 1.67428596150166e-05, | |
| "loss": 0.7657152414321899, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.9070559610705596, | |
| "grad_norm": 0.5306779742240906, | |
| "learning_rate": 1.6732292993474316e-05, | |
| "loss": 0.6991469264030457, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.9083536090835361, | |
| "grad_norm": 0.517011284828186, | |
| "learning_rate": 1.6721712606096833e-05, | |
| "loss": 0.6861897706985474, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9096512570965126, | |
| "grad_norm": 0.5209232568740845, | |
| "learning_rate": 1.6711118474518363e-05, | |
| "loss": 0.6535213589668274, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.910948905109489, | |
| "grad_norm": 0.538005530834198, | |
| "learning_rate": 1.6700510620401223e-05, | |
| "loss": 0.6827917695045471, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.9122465531224655, | |
| "grad_norm": 0.5532050132751465, | |
| "learning_rate": 1.6689889065435796e-05, | |
| "loss": 0.7328672409057617, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.913544201135442, | |
| "grad_norm": 0.5541777014732361, | |
| "learning_rate": 1.667925383134047e-05, | |
| "loss": 0.639081597328186, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.9148418491484185, | |
| "grad_norm": 0.5441383719444275, | |
| "learning_rate": 1.66686049398616e-05, | |
| "loss": 0.7073994874954224, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.916139497161395, | |
| "grad_norm": 0.5432547330856323, | |
| "learning_rate": 1.6657942412773484e-05, | |
| "loss": 0.7249147295951843, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.9174371451743715, | |
| "grad_norm": 0.5718936324119568, | |
| "learning_rate": 1.664726627187829e-05, | |
| "loss": 0.7475080490112305, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.918734793187348, | |
| "grad_norm": 0.5303789377212524, | |
| "learning_rate": 1.6636576539006015e-05, | |
| "loss": 0.7102556228637695, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.9200324412003245, | |
| "grad_norm": 0.5120844841003418, | |
| "learning_rate": 1.6625873236014464e-05, | |
| "loss": 0.7160992622375488, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.9213300892133008, | |
| "grad_norm": 0.5382957458496094, | |
| "learning_rate": 1.6615156384789185e-05, | |
| "loss": 0.6958597898483276, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.9226277372262773, | |
| "grad_norm": 0.5195145606994629, | |
| "learning_rate": 1.660442600724342e-05, | |
| "loss": 0.6958160400390625, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.9239253852392538, | |
| "grad_norm": 0.5473058223724365, | |
| "learning_rate": 1.659368212531808e-05, | |
| "loss": 0.7220757007598877, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.9252230332522303, | |
| "grad_norm": 0.5131781697273254, | |
| "learning_rate": 1.6582924760981683e-05, | |
| "loss": 0.7035195827484131, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.9265206812652068, | |
| "grad_norm": 0.5314381122589111, | |
| "learning_rate": 1.6572153936230316e-05, | |
| "loss": 0.6506175994873047, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.9278183292781833, | |
| "grad_norm": 0.565310001373291, | |
| "learning_rate": 1.6561369673087588e-05, | |
| "loss": 0.7714331746101379, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.9291159772911598, | |
| "grad_norm": 0.530504584312439, | |
| "learning_rate": 1.6550571993604587e-05, | |
| "loss": 0.7331136465072632, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.9304136253041363, | |
| "grad_norm": 0.5755041837692261, | |
| "learning_rate": 1.6539760919859838e-05, | |
| "loss": 0.7090123891830444, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.9317112733171128, | |
| "grad_norm": 0.5264776349067688, | |
| "learning_rate": 1.6528936473959253e-05, | |
| "loss": 0.7207454442977905, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.9330089213300892, | |
| "grad_norm": 0.5459887981414795, | |
| "learning_rate": 1.6518098678036073e-05, | |
| "loss": 0.7477676272392273, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 0.5480107069015503, | |
| "learning_rate": 1.650724755425086e-05, | |
| "loss": 0.7585529685020447, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9356042173560422, | |
| "grad_norm": 0.5156884789466858, | |
| "learning_rate": 1.6496383124791406e-05, | |
| "loss": 0.684555172920227, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.9369018653690186, | |
| "grad_norm": 0.5162327289581299, | |
| "learning_rate": 1.6485505411872725e-05, | |
| "loss": 0.7163575887680054, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.9381995133819951, | |
| "grad_norm": 0.5424114465713501, | |
| "learning_rate": 1.6474614437736986e-05, | |
| "loss": 0.722049355506897, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.9394971613949716, | |
| "grad_norm": 0.546845555305481, | |
| "learning_rate": 1.6463710224653477e-05, | |
| "loss": 0.7012547850608826, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.9407948094079481, | |
| "grad_norm": 0.5183011889457703, | |
| "learning_rate": 1.6452792794918545e-05, | |
| "loss": 0.7152835130691528, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.9420924574209246, | |
| "grad_norm": 0.5085439682006836, | |
| "learning_rate": 1.644186217085558e-05, | |
| "loss": 0.7061685919761658, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.943390105433901, | |
| "grad_norm": 0.5237677097320557, | |
| "learning_rate": 1.6430918374814937e-05, | |
| "loss": 0.7506479024887085, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.9446877534468775, | |
| "grad_norm": 0.5498985052108765, | |
| "learning_rate": 1.641996142917391e-05, | |
| "loss": 0.7604420185089111, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.945985401459854, | |
| "grad_norm": 0.506365180015564, | |
| "learning_rate": 1.640899135633668e-05, | |
| "loss": 0.7282454967498779, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.9472830494728305, | |
| "grad_norm": 0.5272793769836426, | |
| "learning_rate": 1.6398008178734272e-05, | |
| "loss": 0.7712985277175903, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.948580697485807, | |
| "grad_norm": 0.49885818362236023, | |
| "learning_rate": 1.6387011918824493e-05, | |
| "loss": 0.6967482566833496, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.9498783454987835, | |
| "grad_norm": 0.5086526274681091, | |
| "learning_rate": 1.6376002599091925e-05, | |
| "loss": 0.7118892073631287, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.95117599351176, | |
| "grad_norm": 0.5380651354789734, | |
| "learning_rate": 1.6364980242047835e-05, | |
| "loss": 0.7118611335754395, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.9524736415247365, | |
| "grad_norm": 0.5358894467353821, | |
| "learning_rate": 1.635394487023015e-05, | |
| "loss": 0.73922199010849, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.9537712895377128, | |
| "grad_norm": 0.518375813961029, | |
| "learning_rate": 1.634289650620342e-05, | |
| "loss": 0.7491021156311035, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.9550689375506893, | |
| "grad_norm": 0.5029126405715942, | |
| "learning_rate": 1.633183517255875e-05, | |
| "loss": 0.6724518537521362, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9563665855636658, | |
| "grad_norm": 0.5309873819351196, | |
| "learning_rate": 1.632076089191376e-05, | |
| "loss": 0.7152642011642456, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.9576642335766423, | |
| "grad_norm": 0.5265018343925476, | |
| "learning_rate": 1.630967368691256e-05, | |
| "loss": 0.7223344445228577, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.9589618815896188, | |
| "grad_norm": 0.5360968112945557, | |
| "learning_rate": 1.6298573580225676e-05, | |
| "loss": 0.6773437261581421, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.9602595296025953, | |
| "grad_norm": 0.532696545124054, | |
| "learning_rate": 1.6287460594550017e-05, | |
| "loss": 0.6913273930549622, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9615571776155718, | |
| "grad_norm": 0.5159463286399841, | |
| "learning_rate": 1.6276334752608823e-05, | |
| "loss": 0.7023458480834961, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.9628548256285483, | |
| "grad_norm": 0.5166627764701843, | |
| "learning_rate": 1.6265196077151627e-05, | |
| "loss": 0.6580889821052551, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.9641524736415248, | |
| "grad_norm": 0.5432324409484863, | |
| "learning_rate": 1.62540445909542e-05, | |
| "loss": 0.7707301378250122, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.9654501216545012, | |
| "grad_norm": 0.5537624955177307, | |
| "learning_rate": 1.624288031681851e-05, | |
| "loss": 0.718231737613678, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9667477696674777, | |
| "grad_norm": 0.5601441860198975, | |
| "learning_rate": 1.623170327757267e-05, | |
| "loss": 0.7587568759918213, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.9680454176804542, | |
| "grad_norm": 0.5228809118270874, | |
| "learning_rate": 1.62205134960709e-05, | |
| "loss": 0.7063294649124146, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9693430656934306, | |
| "grad_norm": 0.5264230370521545, | |
| "learning_rate": 1.620931099519347e-05, | |
| "loss": 0.7381964921951294, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.9706407137064071, | |
| "grad_norm": 0.5306467413902283, | |
| "learning_rate": 1.619809579784665e-05, | |
| "loss": 0.6895403861999512, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9719383617193836, | |
| "grad_norm": 0.5162505507469177, | |
| "learning_rate": 1.6186867926962695e-05, | |
| "loss": 0.7042033672332764, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.9732360097323601, | |
| "grad_norm": 0.51023268699646, | |
| "learning_rate": 1.6175627405499746e-05, | |
| "loss": 0.7028312683105469, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9745336577453366, | |
| "grad_norm": 0.5226272344589233, | |
| "learning_rate": 1.6164374256441837e-05, | |
| "loss": 0.7110305428504944, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.975831305758313, | |
| "grad_norm": 0.5189753174781799, | |
| "learning_rate": 1.6153108502798796e-05, | |
| "loss": 0.7227635979652405, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9771289537712895, | |
| "grad_norm": 0.5253064036369324, | |
| "learning_rate": 1.614183016760625e-05, | |
| "loss": 0.708706259727478, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.978426601784266, | |
| "grad_norm": 0.5069226622581482, | |
| "learning_rate": 1.613053927392553e-05, | |
| "loss": 0.7607108354568481, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9797242497972425, | |
| "grad_norm": 0.5430122017860413, | |
| "learning_rate": 1.6119235844843664e-05, | |
| "loss": 0.6882092356681824, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.981021897810219, | |
| "grad_norm": 0.5484969615936279, | |
| "learning_rate": 1.6107919903473294e-05, | |
| "loss": 0.6984055638313293, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9823195458231955, | |
| "grad_norm": 0.5450364351272583, | |
| "learning_rate": 1.6096591472952664e-05, | |
| "loss": 0.7414028644561768, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.983617193836172, | |
| "grad_norm": 0.5095598101615906, | |
| "learning_rate": 1.6085250576445548e-05, | |
| "loss": 0.6796683073043823, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.9849148418491485, | |
| "grad_norm": 0.5161803364753723, | |
| "learning_rate": 1.6073897237141203e-05, | |
| "loss": 0.6673390865325928, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.986212489862125, | |
| "grad_norm": 0.5004435777664185, | |
| "learning_rate": 1.6062531478254333e-05, | |
| "loss": 0.6315610408782959, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9875101378751013, | |
| "grad_norm": 0.5166559219360352, | |
| "learning_rate": 1.605115332302505e-05, | |
| "loss": 0.6672409176826477, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.9888077858880778, | |
| "grad_norm": 0.5332128405570984, | |
| "learning_rate": 1.603976279471879e-05, | |
| "loss": 0.7169513702392578, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9901054339010543, | |
| "grad_norm": 0.5556347370147705, | |
| "learning_rate": 1.6028359916626308e-05, | |
| "loss": 0.708602786064148, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.9914030819140308, | |
| "grad_norm": 0.5154053568840027, | |
| "learning_rate": 1.601694471206359e-05, | |
| "loss": 0.6270056366920471, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "grad_norm": 0.5185645222663879, | |
| "learning_rate": 1.600551720437186e-05, | |
| "loss": 0.6873992085456848, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.9939983779399838, | |
| "grad_norm": 0.546991229057312, | |
| "learning_rate": 1.599407741691746e-05, | |
| "loss": 0.7366882562637329, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9952960259529603, | |
| "grad_norm": 0.5219473838806152, | |
| "learning_rate": 1.5982625373091877e-05, | |
| "loss": 0.6808854937553406, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.9965936739659368, | |
| "grad_norm": 0.5348212122917175, | |
| "learning_rate": 1.5971161096311628e-05, | |
| "loss": 0.7217116355895996, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9978913219789132, | |
| "grad_norm": 0.5152093172073364, | |
| "learning_rate": 1.5959684610018267e-05, | |
| "loss": 0.6545735597610474, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.9991889699918897, | |
| "grad_norm": 0.5182209610939026, | |
| "learning_rate": 1.5948195937678297e-05, | |
| "loss": 0.6775786280632019, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.634954571723938, | |
| "learning_rate": 1.5936695102783148e-05, | |
| "loss": 0.6640980839729309, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.0012976480129765, | |
| "grad_norm": 0.7553068399429321, | |
| "learning_rate": 1.5925182128849116e-05, | |
| "loss": 0.6133830547332764, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.002595296025953, | |
| "grad_norm": 0.6613984704017639, | |
| "learning_rate": 1.591365703941732e-05, | |
| "loss": 0.5815013647079468, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.0038929440389295, | |
| "grad_norm": 0.592282235622406, | |
| "learning_rate": 1.5902119858053652e-05, | |
| "loss": 0.5898460149765015, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.005190592051906, | |
| "grad_norm": 0.5373958945274353, | |
| "learning_rate": 1.589057060834872e-05, | |
| "loss": 0.6019303798675537, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.0064882400648825, | |
| "grad_norm": 0.6260755062103271, | |
| "learning_rate": 1.5879009313917826e-05, | |
| "loss": 0.5970971584320068, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.007785888077859, | |
| "grad_norm": 0.7529841661453247, | |
| "learning_rate": 1.5867435998400885e-05, | |
| "loss": 0.6816403865814209, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.0090835360908355, | |
| "grad_norm": 0.7224608659744263, | |
| "learning_rate": 1.5855850685462404e-05, | |
| "loss": 0.6263958215713501, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.010381184103812, | |
| "grad_norm": 0.6676880121231079, | |
| "learning_rate": 1.584425339879141e-05, | |
| "loss": 0.6304363012313843, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.0116788321167882, | |
| "grad_norm": 0.5799426436424255, | |
| "learning_rate": 1.5832644162101417e-05, | |
| "loss": 0.59343421459198, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.0129764801297647, | |
| "grad_norm": 0.570095956325531, | |
| "learning_rate": 1.5821022999130385e-05, | |
| "loss": 0.5410763025283813, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.0142741281427412, | |
| "grad_norm": 0.5948435068130493, | |
| "learning_rate": 1.580938993364064e-05, | |
| "loss": 0.5649259686470032, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.0155717761557177, | |
| "grad_norm": 0.6467446684837341, | |
| "learning_rate": 1.579774498941886e-05, | |
| "loss": 0.5860875844955444, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.0168694241686942, | |
| "grad_norm": 0.5886529088020325, | |
| "learning_rate": 1.578608819027602e-05, | |
| "loss": 0.5772626996040344, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.0181670721816707, | |
| "grad_norm": 0.5842233896255493, | |
| "learning_rate": 1.5774419560047303e-05, | |
| "loss": 0.6277778148651123, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.0194647201946472, | |
| "grad_norm": 0.590059220790863, | |
| "learning_rate": 1.5762739122592123e-05, | |
| "loss": 0.6396061182022095, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.0207623682076237, | |
| "grad_norm": 0.5897361636161804, | |
| "learning_rate": 1.5751046901794008e-05, | |
| "loss": 0.5980340242385864, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.0220600162206002, | |
| "grad_norm": 0.5984208583831787, | |
| "learning_rate": 1.5739342921560593e-05, | |
| "loss": 0.602581262588501, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.0233576642335767, | |
| "grad_norm": 0.5837097764015198, | |
| "learning_rate": 1.5727627205823554e-05, | |
| "loss": 0.5742583274841309, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.0246553122465532, | |
| "grad_norm": 0.5804028511047363, | |
| "learning_rate": 1.571589977853857e-05, | |
| "loss": 0.6103036999702454, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.0259529602595296, | |
| "grad_norm": 0.5784346461296082, | |
| "learning_rate": 1.5704160663685254e-05, | |
| "loss": 0.5436456203460693, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.0272506082725061, | |
| "grad_norm": 0.576518714427948, | |
| "learning_rate": 1.5692409885267127e-05, | |
| "loss": 0.6918940544128418, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.0285482562854826, | |
| "grad_norm": 0.5824302434921265, | |
| "learning_rate": 1.568064746731156e-05, | |
| "loss": 0.6090575456619263, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.0298459042984591, | |
| "grad_norm": 0.5279770493507385, | |
| "learning_rate": 1.5668873433869718e-05, | |
| "loss": 0.5268336534500122, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.0311435523114356, | |
| "grad_norm": 0.5494199395179749, | |
| "learning_rate": 1.5657087809016517e-05, | |
| "loss": 0.5766473412513733, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.0324412003244121, | |
| "grad_norm": 0.5452569723129272, | |
| "learning_rate": 1.564529061685058e-05, | |
| "loss": 0.5949534177780151, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.0337388483373884, | |
| "grad_norm": 0.5392066240310669, | |
| "learning_rate": 1.5633481881494178e-05, | |
| "loss": 0.5571380853652954, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.0350364963503649, | |
| "grad_norm": 0.5568217635154724, | |
| "learning_rate": 1.562166162709319e-05, | |
| "loss": 0.5642133951187134, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.0363341443633414, | |
| "grad_norm": 0.5702704191207886, | |
| "learning_rate": 1.560982987781704e-05, | |
| "loss": 0.6047669649124146, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.0376317923763179, | |
| "grad_norm": 0.532315731048584, | |
| "learning_rate": 1.5597986657858656e-05, | |
| "loss": 0.5958635807037354, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0389294403892944, | |
| "grad_norm": 0.5331001877784729, | |
| "learning_rate": 1.5586131991434434e-05, | |
| "loss": 0.5987897515296936, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.0402270884022708, | |
| "grad_norm": 0.5481564402580261, | |
| "learning_rate": 1.5574265902784163e-05, | |
| "loss": 0.5622409582138062, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.0415247364152473, | |
| "grad_norm": 0.5720167756080627, | |
| "learning_rate": 1.556238841617099e-05, | |
| "loss": 0.6064007878303528, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.0428223844282238, | |
| "grad_norm": 0.5809172987937927, | |
| "learning_rate": 1.555049955588137e-05, | |
| "loss": 0.6170299053192139, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.0441200324412003, | |
| "grad_norm": 0.5783301591873169, | |
| "learning_rate": 1.5538599346225013e-05, | |
| "loss": 0.568396270275116, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.0454176804541768, | |
| "grad_norm": 0.5668922662734985, | |
| "learning_rate": 1.552668781153484e-05, | |
| "loss": 0.576393723487854, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.0467153284671533, | |
| "grad_norm": 0.5634539723396301, | |
| "learning_rate": 1.5514764976166916e-05, | |
| "loss": 0.6574882864952087, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.0480129764801298, | |
| "grad_norm": 0.5463752150535583, | |
| "learning_rate": 1.5502830864500426e-05, | |
| "loss": 0.5930934548377991, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.0493106244931063, | |
| "grad_norm": 0.5872495174407959, | |
| "learning_rate": 1.5490885500937606e-05, | |
| "loss": 0.609790563583374, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.0506082725060828, | |
| "grad_norm": 0.5574213266372681, | |
| "learning_rate": 1.5478928909903705e-05, | |
| "loss": 0.60848468542099, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.0519059205190593, | |
| "grad_norm": 0.5493984818458557, | |
| "learning_rate": 1.5466961115846927e-05, | |
| "loss": 0.5494011640548706, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.0532035685320358, | |
| "grad_norm": 0.5724595785140991, | |
| "learning_rate": 1.545498214323837e-05, | |
| "loss": 0.5948253273963928, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.0545012165450123, | |
| "grad_norm": 0.5360091924667358, | |
| "learning_rate": 1.544299201657202e-05, | |
| "loss": 0.6195284128189087, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.0557988645579885, | |
| "grad_norm": 0.5609839558601379, | |
| "learning_rate": 1.543099076036463e-05, | |
| "loss": 0.5945447087287903, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.057096512570965, | |
| "grad_norm": 0.5413586497306824, | |
| "learning_rate": 1.5418978399155748e-05, | |
| "loss": 0.55891352891922, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.0583941605839415, | |
| "grad_norm": 0.5763382315635681, | |
| "learning_rate": 1.54069549575076e-05, | |
| "loss": 0.5900748372077942, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.059691808596918, | |
| "grad_norm": 0.5625810623168945, | |
| "learning_rate": 1.539492046000509e-05, | |
| "loss": 0.5834665298461914, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.0609894566098945, | |
| "grad_norm": 0.5442895889282227, | |
| "learning_rate": 1.5382874931255717e-05, | |
| "loss": 0.6234191656112671, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.062287104622871, | |
| "grad_norm": 0.5448631048202515, | |
| "learning_rate": 1.5370818395889536e-05, | |
| "loss": 0.5617302060127258, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.0635847526358475, | |
| "grad_norm": 0.5880674719810486, | |
| "learning_rate": 1.5358750878559113e-05, | |
| "loss": 0.6024942994117737, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.064882400648824, | |
| "grad_norm": 0.5762202143669128, | |
| "learning_rate": 1.5346672403939465e-05, | |
| "loss": 0.625447154045105, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.0661800486618005, | |
| "grad_norm": 0.5726525187492371, | |
| "learning_rate": 1.5334582996728017e-05, | |
| "loss": 0.6527541875839233, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.067477696674777, | |
| "grad_norm": 0.5863476991653442, | |
| "learning_rate": 1.532248268164455e-05, | |
| "loss": 0.6537057161331177, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.0687753446877535, | |
| "grad_norm": 0.5855088829994202, | |
| "learning_rate": 1.5310371483431138e-05, | |
| "loss": 0.5910706520080566, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.07007299270073, | |
| "grad_norm": 0.5428813695907593, | |
| "learning_rate": 1.529824942685212e-05, | |
| "loss": 0.6206585168838501, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.0713706407137065, | |
| "grad_norm": 0.5427327156066895, | |
| "learning_rate": 1.528611653669403e-05, | |
| "loss": 0.6064955592155457, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.072668288726683, | |
| "grad_norm": 0.5533806085586548, | |
| "learning_rate": 1.5273972837765566e-05, | |
| "loss": 0.6161221861839294, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.0739659367396595, | |
| "grad_norm": 0.5330477356910706, | |
| "learning_rate": 1.526181835489751e-05, | |
| "loss": 0.584095299243927, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.075263584752636, | |
| "grad_norm": 0.5572231411933899, | |
| "learning_rate": 1.5249653112942708e-05, | |
| "loss": 0.6146395206451416, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.0765612327656124, | |
| "grad_norm": 0.5302649140357971, | |
| "learning_rate": 1.5237477136776e-05, | |
| "loss": 0.5835666060447693, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0778588807785887, | |
| "grad_norm": 0.524252712726593, | |
| "learning_rate": 1.5225290451294173e-05, | |
| "loss": 0.5483739376068115, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.0791565287915652, | |
| "grad_norm": 0.5535216331481934, | |
| "learning_rate": 1.521309308141592e-05, | |
| "loss": 0.5715370774269104, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.0804541768045417, | |
| "grad_norm": 0.5739737749099731, | |
| "learning_rate": 1.5200885052081767e-05, | |
| "loss": 0.6168693900108337, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.0817518248175182, | |
| "grad_norm": 0.5620468258857727, | |
| "learning_rate": 1.518866638825405e-05, | |
| "loss": 0.6358708143234253, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.0830494728304947, | |
| "grad_norm": 0.5504558086395264, | |
| "learning_rate": 1.517643711491684e-05, | |
| "loss": 0.5625787973403931, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.0843471208434712, | |
| "grad_norm": 0.527152955532074, | |
| "learning_rate": 1.516419725707591e-05, | |
| "loss": 0.5917230248451233, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.0856447688564477, | |
| "grad_norm": 0.5097678899765015, | |
| "learning_rate": 1.5151946839758673e-05, | |
| "loss": 0.5631688237190247, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.0869424168694242, | |
| "grad_norm": 0.5500524044036865, | |
| "learning_rate": 1.5139685888014123e-05, | |
| "loss": 0.6300808787345886, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.0882400648824007, | |
| "grad_norm": 0.580634355545044, | |
| "learning_rate": 1.512741442691281e-05, | |
| "loss": 0.6707481145858765, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.0895377128953772, | |
| "grad_norm": 0.5668573379516602, | |
| "learning_rate": 1.5115132481546763e-05, | |
| "loss": 0.5974687337875366, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.0908353609083536, | |
| "grad_norm": 0.5720273852348328, | |
| "learning_rate": 1.5102840077029452e-05, | |
| "loss": 0.5461701154708862, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.0921330089213301, | |
| "grad_norm": 0.5787645578384399, | |
| "learning_rate": 1.509053723849574e-05, | |
| "loss": 0.6476290225982666, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.0934306569343066, | |
| "grad_norm": 0.5475322604179382, | |
| "learning_rate": 1.5078223991101805e-05, | |
| "loss": 0.5730643272399902, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.0947283049472831, | |
| "grad_norm": 0.5544430017471313, | |
| "learning_rate": 1.5065900360025128e-05, | |
| "loss": 0.6112351417541504, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.0960259529602596, | |
| "grad_norm": 0.6194364428520203, | |
| "learning_rate": 1.5053566370464416e-05, | |
| "loss": 0.612515926361084, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.0973236009732361, | |
| "grad_norm": 0.5542813539505005, | |
| "learning_rate": 1.5041222047639558e-05, | |
| "loss": 0.60612952709198, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.0986212489862126, | |
| "grad_norm": 0.5259748697280884, | |
| "learning_rate": 1.5028867416791566e-05, | |
| "loss": 0.5666128396987915, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.0999188969991889, | |
| "grad_norm": 0.5615611672401428, | |
| "learning_rate": 1.5016502503182533e-05, | |
| "loss": 0.5991164445877075, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.1012165450121654, | |
| "grad_norm": 0.5396665334701538, | |
| "learning_rate": 1.5004127332095579e-05, | |
| "loss": 0.608413815498352, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.1025141930251419, | |
| "grad_norm": 0.5625605583190918, | |
| "learning_rate": 1.49917419288348e-05, | |
| "loss": 0.6390218138694763, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.1038118410381184, | |
| "grad_norm": 0.5652357935905457, | |
| "learning_rate": 1.4979346318725203e-05, | |
| "loss": 0.613496720790863, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.1051094890510949, | |
| "grad_norm": 0.5494624376296997, | |
| "learning_rate": 1.4966940527112679e-05, | |
| "loss": 0.6234304308891296, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.1064071370640713, | |
| "grad_norm": 0.546302855014801, | |
| "learning_rate": 1.4954524579363932e-05, | |
| "loss": 0.6565023064613342, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.1077047850770478, | |
| "grad_norm": 0.5649261474609375, | |
| "learning_rate": 1.4942098500866428e-05, | |
| "loss": 0.6422203183174133, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.1090024330900243, | |
| "grad_norm": 0.5499486923217773, | |
| "learning_rate": 1.4929662317028359e-05, | |
| "loss": 0.6043179035186768, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.1103000811030008, | |
| "grad_norm": 0.5544485449790955, | |
| "learning_rate": 1.491721605327857e-05, | |
| "loss": 0.5800666213035583, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.1115977291159773, | |
| "grad_norm": 0.5804775953292847, | |
| "learning_rate": 1.490475973506652e-05, | |
| "loss": 0.6427537798881531, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.1128953771289538, | |
| "grad_norm": 0.5342238545417786, | |
| "learning_rate": 1.4892293387862221e-05, | |
| "loss": 0.6311315298080444, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.1141930251419303, | |
| "grad_norm": 0.5803128480911255, | |
| "learning_rate": 1.487981703715621e-05, | |
| "loss": 0.6198186874389648, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.1154906731549068, | |
| "grad_norm": 0.5532170534133911, | |
| "learning_rate": 1.4867330708459463e-05, | |
| "loss": 0.6145609617233276, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.1167883211678833, | |
| "grad_norm": 0.5493961572647095, | |
| "learning_rate": 1.4854834427303353e-05, | |
| "loss": 0.6166091561317444, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.1180859691808598, | |
| "grad_norm": 0.5559639930725098, | |
| "learning_rate": 1.4842328219239618e-05, | |
| "loss": 0.6064823865890503, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.119383617193836, | |
| "grad_norm": 0.5540943145751953, | |
| "learning_rate": 1.4829812109840291e-05, | |
| "loss": 0.5765544176101685, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.1206812652068125, | |
| "grad_norm": 0.5384024381637573, | |
| "learning_rate": 1.4817286124697647e-05, | |
| "loss": 0.565604567527771, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.121978913219789, | |
| "grad_norm": 0.5547834634780884, | |
| "learning_rate": 1.480475028942415e-05, | |
| "loss": 0.6463969349861145, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.1232765612327655, | |
| "grad_norm": 0.5574260354042053, | |
| "learning_rate": 1.4792204629652414e-05, | |
| "loss": 0.5858181118965149, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.124574209245742, | |
| "grad_norm": 0.5450447201728821, | |
| "learning_rate": 1.4779649171035138e-05, | |
| "loss": 0.6112916469573975, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.1258718572587185, | |
| "grad_norm": 0.5452038645744324, | |
| "learning_rate": 1.4767083939245055e-05, | |
| "loss": 0.6333041787147522, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.127169505271695, | |
| "grad_norm": 0.5453193187713623, | |
| "learning_rate": 1.475450895997489e-05, | |
| "loss": 0.6154720783233643, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.1284671532846715, | |
| "grad_norm": 0.5503911375999451, | |
| "learning_rate": 1.4741924258937283e-05, | |
| "loss": 0.580187201499939, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.129764801297648, | |
| "grad_norm": 0.564156174659729, | |
| "learning_rate": 1.472932986186477e-05, | |
| "loss": 0.6397178173065186, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.1310624493106245, | |
| "grad_norm": 0.5705751180648804, | |
| "learning_rate": 1.47167257945097e-05, | |
| "loss": 0.6369278430938721, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.132360097323601, | |
| "grad_norm": 0.562324583530426, | |
| "learning_rate": 1.4704112082644207e-05, | |
| "loss": 0.5986394882202148, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.1336577453365775, | |
| "grad_norm": 0.5652042031288147, | |
| "learning_rate": 1.4691488752060132e-05, | |
| "loss": 0.6185961365699768, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.134955393349554, | |
| "grad_norm": 0.5481469035148621, | |
| "learning_rate": 1.4678855828568996e-05, | |
| "loss": 0.5570172071456909, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.1362530413625305, | |
| "grad_norm": 0.5480834245681763, | |
| "learning_rate": 1.4666213338001929e-05, | |
| "loss": 0.5788794755935669, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.137550689375507, | |
| "grad_norm": 0.5426838994026184, | |
| "learning_rate": 1.4653561306209625e-05, | |
| "loss": 0.5975257158279419, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.1388483373884835, | |
| "grad_norm": 0.5632731914520264, | |
| "learning_rate": 1.4640899759062285e-05, | |
| "loss": 0.6319808959960938, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.14014598540146, | |
| "grad_norm": 0.5687447786331177, | |
| "learning_rate": 1.462822872244957e-05, | |
| "loss": 0.6043187379837036, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.1414436334144362, | |
| "grad_norm": 0.5472837686538696, | |
| "learning_rate": 1.461554822228054e-05, | |
| "loss": 0.607802152633667, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.142741281427413, | |
| "grad_norm": 0.5329515933990479, | |
| "learning_rate": 1.460285828448361e-05, | |
| "loss": 0.5557148456573486, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.1440389294403892, | |
| "grad_norm": 0.5272259712219238, | |
| "learning_rate": 1.4590158935006494e-05, | |
| "loss": 0.5320879817008972, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.1453365774533657, | |
| "grad_norm": 0.5834517478942871, | |
| "learning_rate": 1.4577450199816142e-05, | |
| "loss": 0.6263319253921509, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.1466342254663422, | |
| "grad_norm": 0.5725152492523193, | |
| "learning_rate": 1.4564732104898702e-05, | |
| "loss": 0.659183919429779, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.1479318734793187, | |
| "grad_norm": 0.5416671633720398, | |
| "learning_rate": 1.4552004676259462e-05, | |
| "loss": 0.5948503613471985, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.1492295214922952, | |
| "grad_norm": 0.5543138384819031, | |
| "learning_rate": 1.453926793992279e-05, | |
| "loss": 0.6404953002929688, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.1505271695052717, | |
| "grad_norm": 0.5595470070838928, | |
| "learning_rate": 1.4526521921932091e-05, | |
| "loss": 0.6393734812736511, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.1518248175182482, | |
| "grad_norm": 0.5882608294487, | |
| "learning_rate": 1.4513766648349742e-05, | |
| "loss": 0.5654003024101257, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.1531224655312247, | |
| "grad_norm": 0.5529691576957703, | |
| "learning_rate": 1.4501002145257048e-05, | |
| "loss": 0.6137228012084961, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.1544201135442012, | |
| "grad_norm": 0.5548762083053589, | |
| "learning_rate": 1.4488228438754191e-05, | |
| "loss": 0.603983998298645, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.1557177615571776, | |
| "grad_norm": 0.5486696362495422, | |
| "learning_rate": 1.4475445554960166e-05, | |
| "loss": 0.6514973640441895, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.1570154095701541, | |
| "grad_norm": 0.5455385446548462, | |
| "learning_rate": 1.4462653520012736e-05, | |
| "loss": 0.6550310850143433, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.1583130575831306, | |
| "grad_norm": 0.5628224015235901, | |
| "learning_rate": 1.4449852360068372e-05, | |
| "loss": 0.6537249088287354, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.1596107055961071, | |
| "grad_norm": 0.5596909523010254, | |
| "learning_rate": 1.4437042101302212e-05, | |
| "loss": 0.6253930926322937, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.1609083536090836, | |
| "grad_norm": 0.5298051238059998, | |
| "learning_rate": 1.4424222769907985e-05, | |
| "loss": 0.57865309715271, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.1622060016220601, | |
| "grad_norm": 0.5473706722259521, | |
| "learning_rate": 1.4411394392097985e-05, | |
| "loss": 0.5876542329788208, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.1635036496350364, | |
| "grad_norm": 0.5646262168884277, | |
| "learning_rate": 1.4398556994102996e-05, | |
| "loss": 0.6242583990097046, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.164801297648013, | |
| "grad_norm": 0.5632451176643372, | |
| "learning_rate": 1.4385710602172245e-05, | |
| "loss": 0.6315684914588928, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.1660989456609894, | |
| "grad_norm": 0.5819709300994873, | |
| "learning_rate": 1.4372855242573356e-05, | |
| "loss": 0.5947535037994385, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.1673965936739659, | |
| "grad_norm": 0.5634546875953674, | |
| "learning_rate": 1.4359990941592283e-05, | |
| "loss": 0.6281697750091553, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1686942416869424, | |
| "grad_norm": 0.5534945130348206, | |
| "learning_rate": 1.4347117725533269e-05, | |
| "loss": 0.567562460899353, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.1699918896999189, | |
| "grad_norm": 0.5352903604507446, | |
| "learning_rate": 1.4334235620718774e-05, | |
| "loss": 0.5504214763641357, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.1712895377128953, | |
| "grad_norm": 0.5894420146942139, | |
| "learning_rate": 1.4321344653489453e-05, | |
| "loss": 0.5871877074241638, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.1725871857258718, | |
| "grad_norm": 0.5826941728591919, | |
| "learning_rate": 1.4308444850204066e-05, | |
| "loss": 0.5854516625404358, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.1738848337388483, | |
| "grad_norm": 0.5583464503288269, | |
| "learning_rate": 1.4295536237239445e-05, | |
| "loss": 0.6143467426300049, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.1751824817518248, | |
| "grad_norm": 0.5566253662109375, | |
| "learning_rate": 1.4282618840990438e-05, | |
| "loss": 0.6143018007278442, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.1764801297648013, | |
| "grad_norm": 0.5643221735954285, | |
| "learning_rate": 1.4269692687869849e-05, | |
| "loss": 0.6445101499557495, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.1777777777777778, | |
| "grad_norm": 0.583202600479126, | |
| "learning_rate": 1.425675780430839e-05, | |
| "loss": 0.6551916599273682, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.1790754257907543, | |
| "grad_norm": 0.5802360773086548, | |
| "learning_rate": 1.4243814216754626e-05, | |
| "loss": 0.6176046133041382, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.1803730738037308, | |
| "grad_norm": 0.5651218295097351, | |
| "learning_rate": 1.4230861951674914e-05, | |
| "loss": 0.6476747393608093, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.1816707218167073, | |
| "grad_norm": 0.5351070761680603, | |
| "learning_rate": 1.421790103555336e-05, | |
| "loss": 0.5974748134613037, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.1829683698296838, | |
| "grad_norm": 0.5506876111030579, | |
| "learning_rate": 1.4204931494891759e-05, | |
| "loss": 0.5977579355239868, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.1842660178426603, | |
| "grad_norm": 0.5496414303779602, | |
| "learning_rate": 1.4191953356209535e-05, | |
| "loss": 0.5993613004684448, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.1855636658556366, | |
| "grad_norm": 0.5448877215385437, | |
| "learning_rate": 1.4178966646043702e-05, | |
| "loss": 0.5849076509475708, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.186861313868613, | |
| "grad_norm": 0.5505439043045044, | |
| "learning_rate": 1.4165971390948787e-05, | |
| "loss": 0.6557425856590271, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.1881589618815895, | |
| "grad_norm": 0.5327088236808777, | |
| "learning_rate": 1.4152967617496805e-05, | |
| "loss": 0.5915898084640503, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.189456609894566, | |
| "grad_norm": 0.5534889698028564, | |
| "learning_rate": 1.4139955352277176e-05, | |
| "loss": 0.574662983417511, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.1907542579075425, | |
| "grad_norm": 0.5179355144500732, | |
| "learning_rate": 1.4126934621896692e-05, | |
| "loss": 0.5562629699707031, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.192051905920519, | |
| "grad_norm": 0.5698444247245789, | |
| "learning_rate": 1.4113905452979455e-05, | |
| "loss": 0.6139298677444458, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.1933495539334955, | |
| "grad_norm": 0.5280522108078003, | |
| "learning_rate": 1.410086787216681e-05, | |
| "loss": 0.5793087482452393, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1933495539334955, | |
| "eval_loss": 0.6891781091690063, | |
| "eval_runtime": 72.4953, | |
| "eval_samples_per_second": 71.618, | |
| "eval_steps_per_second": 8.952, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.194647201946472, | |
| "grad_norm": 0.518786609172821, | |
| "learning_rate": 1.4087821906117314e-05, | |
| "loss": 0.5602763891220093, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.1959448499594485, | |
| "grad_norm": 0.5518815517425537, | |
| "learning_rate": 1.4074767581506666e-05, | |
| "loss": 0.6225783824920654, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.197242497972425, | |
| "grad_norm": 0.5233501195907593, | |
| "learning_rate": 1.4061704925027653e-05, | |
| "loss": 0.5846587419509888, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.1985401459854015, | |
| "grad_norm": 0.5470210313796997, | |
| "learning_rate": 1.4048633963390105e-05, | |
| "loss": 0.5750600099563599, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.199837793998378, | |
| "grad_norm": 0.5647477507591248, | |
| "learning_rate": 1.4035554723320828e-05, | |
| "loss": 0.5977157354354858, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.2011354420113545, | |
| "grad_norm": 0.5179945230484009, | |
| "learning_rate": 1.4022467231563554e-05, | |
| "loss": 0.5806452035903931, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.202433090024331, | |
| "grad_norm": 0.5535194873809814, | |
| "learning_rate": 1.4009371514878898e-05, | |
| "loss": 0.6628227233886719, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.2037307380373075, | |
| "grad_norm": 0.6273780465126038, | |
| "learning_rate": 1.399626760004428e-05, | |
| "loss": 0.6142767667770386, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.205028386050284, | |
| "grad_norm": 0.5373409390449524, | |
| "learning_rate": 1.3983155513853897e-05, | |
| "loss": 0.6562739610671997, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.2063260340632604, | |
| "grad_norm": 0.5411200523376465, | |
| "learning_rate": 1.3970035283118639e-05, | |
| "loss": 0.5903608202934265, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.2076236820762367, | |
| "grad_norm": 0.5595235824584961, | |
| "learning_rate": 1.3956906934666056e-05, | |
| "loss": 0.6051539182662964, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.2089213300892132, | |
| "grad_norm": 0.5300971865653992, | |
| "learning_rate": 1.3943770495340307e-05, | |
| "loss": 0.643832802772522, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.2102189781021897, | |
| "grad_norm": 0.5413315892219543, | |
| "learning_rate": 1.3930625992002076e-05, | |
| "loss": 0.5942864418029785, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.2115166261151662, | |
| "grad_norm": 0.558797299861908, | |
| "learning_rate": 1.391747345152855e-05, | |
| "loss": 0.619717001914978, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.2128142741281427, | |
| "grad_norm": 0.5264928936958313, | |
| "learning_rate": 1.3904312900813345e-05, | |
| "loss": 0.5522656440734863, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.2141119221411192, | |
| "grad_norm": 0.5257030725479126, | |
| "learning_rate": 1.3891144366766457e-05, | |
| "loss": 0.5786164999008179, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.2154095701540957, | |
| "grad_norm": 0.577509343624115, | |
| "learning_rate": 1.3877967876314205e-05, | |
| "loss": 0.6315740346908569, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.2167072181670722, | |
| "grad_norm": 0.5317774415016174, | |
| "learning_rate": 1.3864783456399174e-05, | |
| "loss": 0.5896605253219604, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.2180048661800487, | |
| "grad_norm": 0.5598568320274353, | |
| "learning_rate": 1.3851591133980167e-05, | |
| "loss": 0.6161408424377441, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.2193025141930252, | |
| "grad_norm": 0.5387381911277771, | |
| "learning_rate": 1.3838390936032146e-05, | |
| "loss": 0.5705558061599731, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.2206001622060016, | |
| "grad_norm": 0.5279619693756104, | |
| "learning_rate": 1.3825182889546173e-05, | |
| "loss": 0.5650646686553955, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.2218978102189781, | |
| "grad_norm": 0.5602632164955139, | |
| "learning_rate": 1.3811967021529362e-05, | |
| "loss": 0.6143766045570374, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.2231954582319546, | |
| "grad_norm": 0.5425279140472412, | |
| "learning_rate": 1.3798743359004816e-05, | |
| "loss": 0.602745771408081, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.2244931062449311, | |
| "grad_norm": 0.5385331511497498, | |
| "learning_rate": 1.378551192901158e-05, | |
| "loss": 0.5555763244628906, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.2257907542579076, | |
| "grad_norm": 0.5338374972343445, | |
| "learning_rate": 1.3772272758604576e-05, | |
| "loss": 0.5934339165687561, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.2270884022708841, | |
| "grad_norm": 0.5479584336280823, | |
| "learning_rate": 1.375902587485456e-05, | |
| "loss": 0.5891726016998291, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.2283860502838606, | |
| "grad_norm": 0.5357087254524231, | |
| "learning_rate": 1.3745771304848056e-05, | |
| "loss": 0.5626200437545776, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.2296836982968369, | |
| "grad_norm": 0.5543829202651978, | |
| "learning_rate": 1.3732509075687302e-05, | |
| "loss": 0.5829602479934692, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.2309813463098134, | |
| "grad_norm": 0.5650047659873962, | |
| "learning_rate": 1.3719239214490203e-05, | |
| "loss": 0.6154081225395203, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.2322789943227899, | |
| "grad_norm": 0.5745924711227417, | |
| "learning_rate": 1.3705961748390264e-05, | |
| "loss": 0.5824979543685913, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.2335766423357664, | |
| "grad_norm": 0.5524203777313232, | |
| "learning_rate": 1.3692676704536547e-05, | |
| "loss": 0.6566962599754333, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.2348742903487429, | |
| "grad_norm": 0.5592309832572937, | |
| "learning_rate": 1.3679384110093601e-05, | |
| "loss": 0.5955104231834412, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.2361719383617193, | |
| "grad_norm": 0.5388526916503906, | |
| "learning_rate": 1.3666083992241414e-05, | |
| "loss": 0.6259311437606812, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.2374695863746958, | |
| "grad_norm": 0.5431481599807739, | |
| "learning_rate": 1.3652776378175366e-05, | |
| "loss": 0.6409016847610474, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.2387672343876723, | |
| "grad_norm": 0.5381134748458862, | |
| "learning_rate": 1.3639461295106157e-05, | |
| "loss": 0.5895624160766602, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.2400648824006488, | |
| "grad_norm": 0.5462051630020142, | |
| "learning_rate": 1.3626138770259765e-05, | |
| "loss": 0.5515483617782593, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.2413625304136253, | |
| "grad_norm": 0.5416935682296753, | |
| "learning_rate": 1.3612808830877377e-05, | |
| "loss": 0.5839380621910095, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.2426601784266018, | |
| "grad_norm": 0.543431282043457, | |
| "learning_rate": 1.3599471504215347e-05, | |
| "loss": 0.6129022836685181, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.2439578264395783, | |
| "grad_norm": 0.5546287894248962, | |
| "learning_rate": 1.358612681754513e-05, | |
| "loss": 0.5957478284835815, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.2452554744525548, | |
| "grad_norm": 0.5636503100395203, | |
| "learning_rate": 1.357277479815324e-05, | |
| "loss": 0.6206330060958862, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.2465531224655313, | |
| "grad_norm": 0.5537446141242981, | |
| "learning_rate": 1.355941547334117e-05, | |
| "loss": 0.5747988224029541, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.2478507704785078, | |
| "grad_norm": 0.5459409952163696, | |
| "learning_rate": 1.3546048870425356e-05, | |
| "loss": 0.5868381261825562, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.2491484184914843, | |
| "grad_norm": 0.5428374409675598, | |
| "learning_rate": 1.3532675016737127e-05, | |
| "loss": 0.6297606825828552, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.2504460665044608, | |
| "grad_norm": 0.5484406352043152, | |
| "learning_rate": 1.3519293939622622e-05, | |
| "loss": 0.6754599213600159, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.251743714517437, | |
| "grad_norm": 0.5630886554718018, | |
| "learning_rate": 1.3505905666442757e-05, | |
| "loss": 0.655160129070282, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.2530413625304138, | |
| "grad_norm": 0.5442233085632324, | |
| "learning_rate": 1.3492510224573165e-05, | |
| "loss": 0.5808818936347961, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.25433901054339, | |
| "grad_norm": 0.5171942114830017, | |
| "learning_rate": 1.3479107641404134e-05, | |
| "loss": 0.5760788321495056, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.2556366585563665, | |
| "grad_norm": 0.5334968566894531, | |
| "learning_rate": 1.3465697944340552e-05, | |
| "loss": 0.5447085499763489, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.256934306569343, | |
| "grad_norm": 0.5165731310844421, | |
| "learning_rate": 1.3452281160801856e-05, | |
| "loss": 0.600307822227478, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.2582319545823195, | |
| "grad_norm": 0.5485058426856995, | |
| "learning_rate": 1.3438857318221974e-05, | |
| "loss": 0.6196280717849731, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.259529602595296, | |
| "grad_norm": 0.5499110817909241, | |
| "learning_rate": 1.3425426444049265e-05, | |
| "loss": 0.6000030040740967, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.2608272506082725, | |
| "grad_norm": 0.5815853476524353, | |
| "learning_rate": 1.3411988565746467e-05, | |
| "loss": 0.6568498611450195, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.262124898621249, | |
| "grad_norm": 0.5364983081817627, | |
| "learning_rate": 1.3398543710790642e-05, | |
| "loss": 0.6078934073448181, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.2634225466342255, | |
| "grad_norm": 0.5517644286155701, | |
| "learning_rate": 1.3385091906673115e-05, | |
| "loss": 0.6221879720687866, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.264720194647202, | |
| "grad_norm": 0.5543562769889832, | |
| "learning_rate": 1.3371633180899417e-05, | |
| "loss": 0.6666390895843506, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.2660178426601785, | |
| "grad_norm": 0.5409432053565979, | |
| "learning_rate": 1.335816756098924e-05, | |
| "loss": 0.6188746690750122, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.267315490673155, | |
| "grad_norm": 0.590812087059021, | |
| "learning_rate": 1.3344695074476365e-05, | |
| "loss": 0.6498491764068604, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.2686131386861315, | |
| "grad_norm": 0.5648714900016785, | |
| "learning_rate": 1.3331215748908622e-05, | |
| "loss": 0.6376237869262695, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.269910786699108, | |
| "grad_norm": 0.5377125144004822, | |
| "learning_rate": 1.3317729611847818e-05, | |
| "loss": 0.6080333590507507, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.2712084347120842, | |
| "grad_norm": 0.6160985231399536, | |
| "learning_rate": 1.3304236690869688e-05, | |
| "loss": 0.6452457904815674, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.272506082725061, | |
| "grad_norm": 0.5675063133239746, | |
| "learning_rate": 1.329073701356384e-05, | |
| "loss": 0.6066033840179443, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.2738037307380372, | |
| "grad_norm": 0.5339285731315613, | |
| "learning_rate": 1.3277230607533698e-05, | |
| "loss": 0.563126266002655, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.275101378751014, | |
| "grad_norm": 0.558273434638977, | |
| "learning_rate": 1.3263717500396446e-05, | |
| "loss": 0.6070864796638489, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.2763990267639902, | |
| "grad_norm": 0.5663204789161682, | |
| "learning_rate": 1.3250197719782966e-05, | |
| "loss": 0.6016590595245361, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.2776966747769667, | |
| "grad_norm": 0.5561959743499756, | |
| "learning_rate": 1.3236671293337788e-05, | |
| "loss": 0.6111094951629639, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.2789943227899432, | |
| "grad_norm": 0.5440069437026978, | |
| "learning_rate": 1.3223138248719032e-05, | |
| "loss": 0.6232655644416809, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.2802919708029197, | |
| "grad_norm": 0.5603107810020447, | |
| "learning_rate": 1.3209598613598344e-05, | |
| "loss": 0.5950015783309937, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.2815896188158962, | |
| "grad_norm": 0.538038969039917, | |
| "learning_rate": 1.3196052415660856e-05, | |
| "loss": 0.6100248098373413, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.2828872668288727, | |
| "grad_norm": 0.5667180418968201, | |
| "learning_rate": 1.318249968260511e-05, | |
| "loss": 0.6681912541389465, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.2841849148418492, | |
| "grad_norm": 0.5527055859565735, | |
| "learning_rate": 1.316894044214302e-05, | |
| "loss": 0.6051948070526123, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.2854825628548256, | |
| "grad_norm": 0.5413651466369629, | |
| "learning_rate": 1.3155374721999797e-05, | |
| "loss": 0.5882329940795898, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.2867802108678021, | |
| "grad_norm": 0.5323876738548279, | |
| "learning_rate": 1.3141802549913907e-05, | |
| "loss": 0.6183469295501709, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.2880778588807786, | |
| "grad_norm": 0.5273195505142212, | |
| "learning_rate": 1.3128223953637003e-05, | |
| "loss": 0.5676054954528809, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.2893755068937551, | |
| "grad_norm": 0.567756175994873, | |
| "learning_rate": 1.3114638960933883e-05, | |
| "loss": 0.6798044443130493, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.2906731549067316, | |
| "grad_norm": 0.5517603754997253, | |
| "learning_rate": 1.3101047599582415e-05, | |
| "loss": 0.6340286731719971, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.2919708029197081, | |
| "grad_norm": 0.5477331280708313, | |
| "learning_rate": 1.3087449897373494e-05, | |
| "loss": 0.6021038889884949, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.2932684509326844, | |
| "grad_norm": 0.551368772983551, | |
| "learning_rate": 1.307384588211098e-05, | |
| "loss": 0.5940453410148621, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.294566098945661, | |
| "grad_norm": 0.5456337928771973, | |
| "learning_rate": 1.306023558161164e-05, | |
| "loss": 0.6023222208023071, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.2958637469586374, | |
| "grad_norm": 0.5676029324531555, | |
| "learning_rate": 1.3046619023705095e-05, | |
| "loss": 0.6922143697738647, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.2971613949716139, | |
| "grad_norm": 0.5776983499526978, | |
| "learning_rate": 1.3032996236233756e-05, | |
| "loss": 0.6589181423187256, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2984590429845904, | |
| "grad_norm": 0.5594776272773743, | |
| "learning_rate": 1.3019367247052781e-05, | |
| "loss": 0.6284008622169495, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.2997566909975669, | |
| "grad_norm": 0.5632730722427368, | |
| "learning_rate": 1.300573208403e-05, | |
| "loss": 0.586546778678894, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.3010543390105433, | |
| "grad_norm": 0.5418180823326111, | |
| "learning_rate": 1.2992090775045868e-05, | |
| "loss": 0.5931944847106934, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.3023519870235198, | |
| "grad_norm": 0.5260592699050903, | |
| "learning_rate": 1.2978443347993415e-05, | |
| "loss": 0.5439613461494446, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.3036496350364963, | |
| "grad_norm": 0.546437680721283, | |
| "learning_rate": 1.296478983077817e-05, | |
| "loss": 0.5946912169456482, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.3049472830494728, | |
| "grad_norm": 0.5575598478317261, | |
| "learning_rate": 1.2951130251318125e-05, | |
| "loss": 0.6190862655639648, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.3062449310624493, | |
| "grad_norm": 0.5441600680351257, | |
| "learning_rate": 1.2937464637543655e-05, | |
| "loss": 0.613700270652771, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.3075425790754258, | |
| "grad_norm": 0.5194239020347595, | |
| "learning_rate": 1.2923793017397488e-05, | |
| "loss": 0.551931619644165, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.3088402270884023, | |
| "grad_norm": 0.521641194820404, | |
| "learning_rate": 1.2910115418834624e-05, | |
| "loss": 0.544873833656311, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.3101378751013788, | |
| "grad_norm": 0.5697146654129028, | |
| "learning_rate": 1.289643186982229e-05, | |
| "loss": 0.6762262582778931, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.3114355231143553, | |
| "grad_norm": 0.5358358025550842, | |
| "learning_rate": 1.2882742398339884e-05, | |
| "loss": 0.5811675190925598, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.3127331711273318, | |
| "grad_norm": 0.5812531113624573, | |
| "learning_rate": 1.2869047032378905e-05, | |
| "loss": 0.6202974319458008, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.3140308191403083, | |
| "grad_norm": 0.5383328795433044, | |
| "learning_rate": 1.2855345799942915e-05, | |
| "loss": 0.58216392993927, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.3153284671532846, | |
| "grad_norm": 0.5470954775810242, | |
| "learning_rate": 1.2841638729047463e-05, | |
| "loss": 0.5842857360839844, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.3166261151662613, | |
| "grad_norm": 0.5181686878204346, | |
| "learning_rate": 1.2827925847720041e-05, | |
| "loss": 0.5985524654388428, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.3179237631792375, | |
| "grad_norm": 0.5179515480995178, | |
| "learning_rate": 1.2814207184000018e-05, | |
| "loss": 0.5709914565086365, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.319221411192214, | |
| "grad_norm": 0.5449542999267578, | |
| "learning_rate": 1.2800482765938594e-05, | |
| "loss": 0.646975576877594, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.3205190592051905, | |
| "grad_norm": 0.5302087664604187, | |
| "learning_rate": 1.2786752621598726e-05, | |
| "loss": 0.6145081520080566, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.321816707218167, | |
| "grad_norm": 0.5520698428153992, | |
| "learning_rate": 1.2773016779055089e-05, | |
| "loss": 0.5821577906608582, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.3231143552311435, | |
| "grad_norm": 0.5411002039909363, | |
| "learning_rate": 1.2759275266393998e-05, | |
| "loss": 0.5899526476860046, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.32441200324412, | |
| "grad_norm": 0.5193924307823181, | |
| "learning_rate": 1.2745528111713373e-05, | |
| "loss": 0.5851880311965942, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.3257096512570965, | |
| "grad_norm": 0.5581620931625366, | |
| "learning_rate": 1.2731775343122663e-05, | |
| "loss": 0.6368898153305054, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.327007299270073, | |
| "grad_norm": 0.5761281847953796, | |
| "learning_rate": 1.2718016988742799e-05, | |
| "loss": 0.6208426356315613, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.3283049472830495, | |
| "grad_norm": 0.5429732799530029, | |
| "learning_rate": 1.270425307670614e-05, | |
| "loss": 0.5906336307525635, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.329602595296026, | |
| "grad_norm": 0.5482628345489502, | |
| "learning_rate": 1.2690483635156392e-05, | |
| "loss": 0.6205004453659058, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.3309002433090025, | |
| "grad_norm": 0.53929603099823, | |
| "learning_rate": 1.2676708692248583e-05, | |
| "loss": 0.5814516544342041, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.332197891321979, | |
| "grad_norm": 0.5420404076576233, | |
| "learning_rate": 1.2662928276148985e-05, | |
| "loss": 0.6052178740501404, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.3334955393349555, | |
| "grad_norm": 0.5524218678474426, | |
| "learning_rate": 1.264914241503506e-05, | |
| "loss": 0.639128565788269, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.334793187347932, | |
| "grad_norm": 0.5308884978294373, | |
| "learning_rate": 1.2635351137095408e-05, | |
| "loss": 0.5758256316184998, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.3360908353609084, | |
| "grad_norm": 0.556959867477417, | |
| "learning_rate": 1.2621554470529698e-05, | |
| "loss": 0.6215351223945618, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.3373884833738847, | |
| "grad_norm": 0.5299232006072998, | |
| "learning_rate": 1.2607752443548622e-05, | |
| "loss": 0.6064879298210144, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.3386861313868614, | |
| "grad_norm": 0.5557371973991394, | |
| "learning_rate": 1.259394508437383e-05, | |
| "loss": 0.62589031457901, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.3399837793998377, | |
| "grad_norm": 0.5563995242118835, | |
| "learning_rate": 1.2580132421237883e-05, | |
| "loss": 0.6236660480499268, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.3412814274128142, | |
| "grad_norm": 0.5666968822479248, | |
| "learning_rate": 1.2566314482384174e-05, | |
| "loss": 0.6252362728118896, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.3425790754257907, | |
| "grad_norm": 0.5652741193771362, | |
| "learning_rate": 1.2552491296066895e-05, | |
| "loss": 0.6189643144607544, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.3438767234387672, | |
| "grad_norm": 0.5583733320236206, | |
| "learning_rate": 1.2538662890550959e-05, | |
| "loss": 0.6765375137329102, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.3451743714517437, | |
| "grad_norm": 0.5742061138153076, | |
| "learning_rate": 1.252482929411196e-05, | |
| "loss": 0.6477082967758179, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.3464720194647202, | |
| "grad_norm": 0.5400403141975403, | |
| "learning_rate": 1.25109905350361e-05, | |
| "loss": 0.5811231136322021, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.3477696674776967, | |
| "grad_norm": 0.5390773415565491, | |
| "learning_rate": 1.249714664162014e-05, | |
| "loss": 0.6055101156234741, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.3490673154906732, | |
| "grad_norm": 0.5596996545791626, | |
| "learning_rate": 1.2483297642171332e-05, | |
| "loss": 0.6074774265289307, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.3503649635036497, | |
| "grad_norm": 0.5600677728652954, | |
| "learning_rate": 1.246944356500738e-05, | |
| "loss": 0.6564399003982544, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.3516626115166261, | |
| "grad_norm": 0.5470819473266602, | |
| "learning_rate": 1.2455584438456366e-05, | |
| "loss": 0.6430810689926147, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.3529602595296026, | |
| "grad_norm": 0.5539683699607849, | |
| "learning_rate": 1.2441720290856694e-05, | |
| "loss": 0.6132862567901611, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.3542579075425791, | |
| "grad_norm": 0.5648192167282104, | |
| "learning_rate": 1.2427851150557036e-05, | |
| "loss": 0.6304311156272888, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.3555555555555556, | |
| "grad_norm": 0.5195255279541016, | |
| "learning_rate": 1.241397704591627e-05, | |
| "loss": 0.5641679763793945, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.3568532035685321, | |
| "grad_norm": 0.5658749341964722, | |
| "learning_rate": 1.2400098005303436e-05, | |
| "loss": 0.6409952044487, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.3581508515815086, | |
| "grad_norm": 0.5088870525360107, | |
| "learning_rate": 1.238621405709766e-05, | |
| "loss": 0.5354233384132385, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.3594484995944849, | |
| "grad_norm": 0.5734469890594482, | |
| "learning_rate": 1.2372325229688093e-05, | |
| "loss": 0.6188406944274902, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.3607461476074616, | |
| "grad_norm": 0.5380412936210632, | |
| "learning_rate": 1.235843155147388e-05, | |
| "loss": 0.5657402873039246, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.3620437956204379, | |
| "grad_norm": 0.5315279960632324, | |
| "learning_rate": 1.2344533050864071e-05, | |
| "loss": 0.5667376518249512, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.3633414436334144, | |
| "grad_norm": 0.5081866979598999, | |
| "learning_rate": 1.2330629756277588e-05, | |
| "loss": 0.5432066917419434, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.3646390916463909, | |
| "grad_norm": 0.5798763036727905, | |
| "learning_rate": 1.2316721696143141e-05, | |
| "loss": 0.6364309191703796, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.3659367396593673, | |
| "grad_norm": 0.5289844870567322, | |
| "learning_rate": 1.23028088988992e-05, | |
| "loss": 0.5321639180183411, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.3672343876723438, | |
| "grad_norm": 0.5852347612380981, | |
| "learning_rate": 1.228889139299391e-05, | |
| "loss": 0.6831628084182739, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.3685320356853203, | |
| "grad_norm": 0.5265390872955322, | |
| "learning_rate": 1.2274969206885048e-05, | |
| "loss": 0.5725244283676147, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.3698296836982968, | |
| "grad_norm": 0.6298306584358215, | |
| "learning_rate": 1.2261042369039966e-05, | |
| "loss": 0.6366633176803589, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.3711273317112733, | |
| "grad_norm": 0.521314263343811, | |
| "learning_rate": 1.2247110907935518e-05, | |
| "loss": 0.5725533962249756, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.3724249797242498, | |
| "grad_norm": 0.5249886512756348, | |
| "learning_rate": 1.2233174852058015e-05, | |
| "loss": 0.577233076095581, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.3737226277372263, | |
| "grad_norm": 0.5558046102523804, | |
| "learning_rate": 1.2219234229903163e-05, | |
| "loss": 0.6044833660125732, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.3750202757502028, | |
| "grad_norm": 0.5569727420806885, | |
| "learning_rate": 1.2205289069976012e-05, | |
| "loss": 0.5831769704818726, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.3763179237631793, | |
| "grad_norm": 0.5547581911087036, | |
| "learning_rate": 1.2191339400790881e-05, | |
| "loss": 0.5798386335372925, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.3776155717761558, | |
| "grad_norm": 0.5544263124465942, | |
| "learning_rate": 1.2177385250871312e-05, | |
| "loss": 0.607170581817627, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.378913219789132, | |
| "grad_norm": 0.5475184321403503, | |
| "learning_rate": 1.2163426648750009e-05, | |
| "loss": 0.596827507019043, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.3802108678021088, | |
| "grad_norm": 0.551906168460846, | |
| "learning_rate": 1.2149463622968782e-05, | |
| "loss": 0.5992593169212341, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.381508515815085, | |
| "grad_norm": 0.5418475270271301, | |
| "learning_rate": 1.2135496202078487e-05, | |
| "loss": 0.5538514852523804, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.3828061638280618, | |
| "grad_norm": 0.5357592105865479, | |
| "learning_rate": 1.2121524414638958e-05, | |
| "loss": 0.6014474630355835, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.384103811841038, | |
| "grad_norm": 0.5673146843910217, | |
| "learning_rate": 1.2107548289218968e-05, | |
| "loss": 0.5835940837860107, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.3854014598540145, | |
| "grad_norm": 0.5655810832977295, | |
| "learning_rate": 1.2093567854396158e-05, | |
| "loss": 0.6108807325363159, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.386699107866991, | |
| "grad_norm": 0.5361012816429138, | |
| "learning_rate": 1.2079583138756976e-05, | |
| "loss": 0.6093813180923462, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.3879967558799675, | |
| "grad_norm": 0.5419613122940063, | |
| "learning_rate": 1.206559417089663e-05, | |
| "loss": 0.6026707887649536, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.389294403892944, | |
| "grad_norm": 0.5429274439811707, | |
| "learning_rate": 1.205160097941901e-05, | |
| "loss": 0.6365257501602173, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.3905920519059205, | |
| "grad_norm": 0.5734850764274597, | |
| "learning_rate": 1.2037603592936656e-05, | |
| "loss": 0.6649122834205627, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.391889699918897, | |
| "grad_norm": 0.5734902024269104, | |
| "learning_rate": 1.2023602040070679e-05, | |
| "loss": 0.7125487327575684, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.3931873479318735, | |
| "grad_norm": 0.5633674263954163, | |
| "learning_rate": 1.2009596349450717e-05, | |
| "loss": 0.6474109292030334, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.39448499594485, | |
| "grad_norm": 0.5378244519233704, | |
| "learning_rate": 1.1995586549714855e-05, | |
| "loss": 0.6136443614959717, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.3957826439578265, | |
| "grad_norm": 0.558250904083252, | |
| "learning_rate": 1.198157266950959e-05, | |
| "loss": 0.6676377058029175, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.397080291970803, | |
| "grad_norm": 0.5315516591072083, | |
| "learning_rate": 1.1967554737489762e-05, | |
| "loss": 0.607810378074646, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.3983779399837795, | |
| "grad_norm": 0.5391795039176941, | |
| "learning_rate": 1.1953532782318491e-05, | |
| "loss": 0.5898000597953796, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.399675587996756, | |
| "grad_norm": 0.5466244220733643, | |
| "learning_rate": 1.1939506832667129e-05, | |
| "loss": 0.5943995118141174, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.4009732360097322, | |
| "grad_norm": 0.5457687973976135, | |
| "learning_rate": 1.1925476917215191e-05, | |
| "loss": 0.6089761257171631, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.402270884022709, | |
| "grad_norm": 0.5727429389953613, | |
| "learning_rate": 1.1911443064650301e-05, | |
| "loss": 0.6369843482971191, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.4035685320356852, | |
| "grad_norm": 0.5765259861946106, | |
| "learning_rate": 1.189740530366814e-05, | |
| "loss": 0.6176037788391113, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.404866180048662, | |
| "grad_norm": 0.5793892741203308, | |
| "learning_rate": 1.1883363662972375e-05, | |
| "loss": 0.6147127747535706, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.4061638280616382, | |
| "grad_norm": 0.5127638578414917, | |
| "learning_rate": 1.1869318171274606e-05, | |
| "loss": 0.5739990472793579, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.4074614760746147, | |
| "grad_norm": 0.5451372861862183, | |
| "learning_rate": 1.1855268857294308e-05, | |
| "loss": 0.6005086898803711, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.4087591240875912, | |
| "grad_norm": 0.5556860566139221, | |
| "learning_rate": 1.1841215749758774e-05, | |
| "loss": 0.6003910303115845, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.4100567721005677, | |
| "grad_norm": 0.5883124470710754, | |
| "learning_rate": 1.182715887740305e-05, | |
| "loss": 0.6721568703651428, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.4113544201135442, | |
| "grad_norm": 0.5330623388290405, | |
| "learning_rate": 1.1813098268969886e-05, | |
| "loss": 0.617790699005127, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.4126520681265207, | |
| "grad_norm": 0.5409324169158936, | |
| "learning_rate": 1.1799033953209664e-05, | |
| "loss": 0.6154944896697998, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.4139497161394972, | |
| "grad_norm": 0.5280669927597046, | |
| "learning_rate": 1.178496595888035e-05, | |
| "loss": 0.6064777970314026, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.4152473641524737, | |
| "grad_norm": 0.5559468269348145, | |
| "learning_rate": 1.1770894314747433e-05, | |
| "loss": 0.6379706263542175, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.4165450121654501, | |
| "grad_norm": 0.5678933262825012, | |
| "learning_rate": 1.1756819049583861e-05, | |
| "loss": 0.5879865288734436, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.4178426601784266, | |
| "grad_norm": 0.5317026972770691, | |
| "learning_rate": 1.1742740192169995e-05, | |
| "loss": 0.6252385377883911, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.4191403081914031, | |
| "grad_norm": 0.5503518581390381, | |
| "learning_rate": 1.1728657771293529e-05, | |
| "loss": 0.5956102013587952, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.4204379562043796, | |
| "grad_norm": 0.5392619967460632, | |
| "learning_rate": 1.171457181574945e-05, | |
| "loss": 0.6110433340072632, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.4217356042173561, | |
| "grad_norm": 0.554594099521637, | |
| "learning_rate": 1.1700482354339972e-05, | |
| "loss": 0.6505380272865295, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.4230332522303324, | |
| "grad_norm": 0.5639646053314209, | |
| "learning_rate": 1.168638941587448e-05, | |
| "loss": 0.6052155494689941, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.424330900243309, | |
| "grad_norm": 0.5569002032279968, | |
| "learning_rate": 1.1672293029169466e-05, | |
| "loss": 0.5856403112411499, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.4256285482562854, | |
| "grad_norm": 0.5615402460098267, | |
| "learning_rate": 1.165819322304847e-05, | |
| "loss": 0.6077978610992432, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.426926196269262, | |
| "grad_norm": 0.5535939931869507, | |
| "learning_rate": 1.164409002634203e-05, | |
| "loss": 0.6245694160461426, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.4282238442822384, | |
| "grad_norm": 0.5362287759780884, | |
| "learning_rate": 1.162998346788761e-05, | |
| "loss": 0.6105297803878784, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.4295214922952149, | |
| "grad_norm": 0.5390259027481079, | |
| "learning_rate": 1.1615873576529556e-05, | |
| "loss": 0.6066164970397949, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.4308191403081914, | |
| "grad_norm": 0.5315901041030884, | |
| "learning_rate": 1.1601760381119022e-05, | |
| "loss": 0.5768907070159912, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.4321167883211678, | |
| "grad_norm": 0.5727961659431458, | |
| "learning_rate": 1.158764391051392e-05, | |
| "loss": 0.6904894113540649, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.4334144363341443, | |
| "grad_norm": 0.5435361862182617, | |
| "learning_rate": 1.1573524193578863e-05, | |
| "loss": 0.5838584899902344, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.4347120843471208, | |
| "grad_norm": 0.5609909296035767, | |
| "learning_rate": 1.1559401259185095e-05, | |
| "loss": 0.6729065775871277, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.4360097323600973, | |
| "grad_norm": 0.5284282565116882, | |
| "learning_rate": 1.1545275136210441e-05, | |
| "loss": 0.5950232744216919, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.4373073803730738, | |
| "grad_norm": 0.603245735168457, | |
| "learning_rate": 1.153114585353925e-05, | |
| "loss": 0.6702573299407959, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.4386050283860503, | |
| "grad_norm": 0.5415088534355164, | |
| "learning_rate": 1.1517013440062326e-05, | |
| "loss": 0.5716216564178467, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.4399026763990268, | |
| "grad_norm": 0.4960046708583832, | |
| "learning_rate": 1.1502877924676881e-05, | |
| "loss": 0.5501525402069092, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.4412003244120033, | |
| "grad_norm": 0.5444253087043762, | |
| "learning_rate": 1.1488739336286467e-05, | |
| "loss": 0.6333913207054138, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.4424979724249798, | |
| "grad_norm": 0.5255866646766663, | |
| "learning_rate": 1.1474597703800915e-05, | |
| "loss": 0.6024140119552612, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.4437956204379563, | |
| "grad_norm": 0.5488544702529907, | |
| "learning_rate": 1.1460453056136285e-05, | |
| "loss": 0.6334477663040161, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.4450932684509326, | |
| "grad_norm": 0.5465590953826904, | |
| "learning_rate": 1.14463054222148e-05, | |
| "loss": 0.6596208810806274, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.4463909164639093, | |
| "grad_norm": 0.5492766499519348, | |
| "learning_rate": 1.1432154830964796e-05, | |
| "loss": 0.6396174430847168, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.4476885644768855, | |
| "grad_norm": 0.5476314425468445, | |
| "learning_rate": 1.1418001311320649e-05, | |
| "loss": 0.6056069135665894, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.4489862124898623, | |
| "grad_norm": 0.5088196396827698, | |
| "learning_rate": 1.1403844892222717e-05, | |
| "loss": 0.5474177002906799, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.4502838605028385, | |
| "grad_norm": 0.5697342753410339, | |
| "learning_rate": 1.1389685602617302e-05, | |
| "loss": 0.6007769107818604, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.451581508515815, | |
| "grad_norm": 0.5281476974487305, | |
| "learning_rate": 1.1375523471456564e-05, | |
| "loss": 0.5913225412368774, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.4528791565287915, | |
| "grad_norm": 0.5619297027587891, | |
| "learning_rate": 1.1361358527698481e-05, | |
| "loss": 0.611336350440979, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.454176804541768, | |
| "grad_norm": 0.531401515007019, | |
| "learning_rate": 1.134719080030677e-05, | |
| "loss": 0.5786083936691284, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.4554744525547445, | |
| "grad_norm": 0.5428561568260193, | |
| "learning_rate": 1.1333020318250854e-05, | |
| "loss": 0.6208731532096863, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.456772100567721, | |
| "grad_norm": 0.5384306311607361, | |
| "learning_rate": 1.131884711050578e-05, | |
| "loss": 0.5843198895454407, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.4580697485806975, | |
| "grad_norm": 0.5160107016563416, | |
| "learning_rate": 1.1304671206052168e-05, | |
| "loss": 0.5473004579544067, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.459367396593674, | |
| "grad_norm": 0.5360195636749268, | |
| "learning_rate": 1.1290492633876164e-05, | |
| "loss": 0.626501202583313, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.4606650446066505, | |
| "grad_norm": 0.5251026749610901, | |
| "learning_rate": 1.1276311422969349e-05, | |
| "loss": 0.5944849848747253, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.461962692619627, | |
| "grad_norm": 0.564008355140686, | |
| "learning_rate": 1.1262127602328712e-05, | |
| "loss": 0.6147276163101196, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.4632603406326035, | |
| "grad_norm": 0.5388748645782471, | |
| "learning_rate": 1.124794120095658e-05, | |
| "loss": 0.5849318504333496, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.46455798864558, | |
| "grad_norm": 0.5595386624336243, | |
| "learning_rate": 1.1233752247860549e-05, | |
| "loss": 0.6283015012741089, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.4658556366585564, | |
| "grad_norm": 0.5528329014778137, | |
| "learning_rate": 1.1219560772053442e-05, | |
| "loss": 0.6135470867156982, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.4671532846715327, | |
| "grad_norm": 0.5480870008468628, | |
| "learning_rate": 1.1205366802553231e-05, | |
| "loss": 0.579879879951477, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.4684509326845094, | |
| "grad_norm": 0.6012369990348816, | |
| "learning_rate": 1.1191170368382992e-05, | |
| "loss": 0.67568039894104, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.4697485806974857, | |
| "grad_norm": 0.5386692881584167, | |
| "learning_rate": 1.117697149857084e-05, | |
| "loss": 0.6155050992965698, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.4710462287104624, | |
| "grad_norm": 0.540510892868042, | |
| "learning_rate": 1.1162770222149873e-05, | |
| "loss": 0.6193840503692627, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.4723438767234387, | |
| "grad_norm": 0.5231954455375671, | |
| "learning_rate": 1.1148566568158099e-05, | |
| "loss": 0.5806912183761597, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.4736415247364152, | |
| "grad_norm": 0.5371982455253601, | |
| "learning_rate": 1.1134360565638402e-05, | |
| "loss": 0.6294920444488525, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.4749391727493917, | |
| "grad_norm": 0.5294065475463867, | |
| "learning_rate": 1.1120152243638457e-05, | |
| "loss": 0.6405944228172302, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.4762368207623682, | |
| "grad_norm": 0.5396026372909546, | |
| "learning_rate": 1.1105941631210694e-05, | |
| "loss": 0.622348427772522, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.4775344687753447, | |
| "grad_norm": 0.5184268951416016, | |
| "learning_rate": 1.1091728757412212e-05, | |
| "loss": 0.5783290863037109, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.4788321167883212, | |
| "grad_norm": 0.5296680331230164, | |
| "learning_rate": 1.107751365130474e-05, | |
| "loss": 0.5765876770019531, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.4801297648012977, | |
| "grad_norm": 0.5528906583786011, | |
| "learning_rate": 1.1063296341954577e-05, | |
| "loss": 0.5958802700042725, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.4814274128142741, | |
| "grad_norm": 0.549384355545044, | |
| "learning_rate": 1.1049076858432517e-05, | |
| "loss": 0.6524186730384827, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.4827250608272506, | |
| "grad_norm": 0.5553792119026184, | |
| "learning_rate": 1.1034855229813812e-05, | |
| "loss": 0.63478684425354, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.4840227088402271, | |
| "grad_norm": 0.5639452934265137, | |
| "learning_rate": 1.1020631485178084e-05, | |
| "loss": 0.6482947468757629, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.4853203568532036, | |
| "grad_norm": 0.5332263708114624, | |
| "learning_rate": 1.1006405653609295e-05, | |
| "loss": 0.6563082337379456, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.4866180048661801, | |
| "grad_norm": 0.5505067110061646, | |
| "learning_rate": 1.0992177764195671e-05, | |
| "loss": 0.6217901706695557, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.4879156528791566, | |
| "grad_norm": 0.5751034021377563, | |
| "learning_rate": 1.0977947846029642e-05, | |
| "loss": 0.618269681930542, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.4892133008921329, | |
| "grad_norm": 0.5259911417961121, | |
| "learning_rate": 1.0963715928207795e-05, | |
| "loss": 0.5809241533279419, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.4905109489051096, | |
| "grad_norm": 0.5405173301696777, | |
| "learning_rate": 1.094948203983079e-05, | |
| "loss": 0.6440936923027039, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.4918085969180859, | |
| "grad_norm": 0.5359426736831665, | |
| "learning_rate": 1.0935246210003334e-05, | |
| "loss": 0.5997065305709839, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4918085969180859, | |
| "eval_loss": 0.6832194328308105, | |
| "eval_runtime": 72.4893, | |
| "eval_samples_per_second": 71.624, | |
| "eval_steps_per_second": 8.953, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4931062449310626, | |
| "grad_norm": 0.545395016670227, | |
| "learning_rate": 1.0921008467834094e-05, | |
| "loss": 0.6377010345458984, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.4944038929440389, | |
| "grad_norm": 0.553674578666687, | |
| "learning_rate": 1.0906768842435647e-05, | |
| "loss": 0.6331782937049866, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.4957015409570154, | |
| "grad_norm": 0.5127398371696472, | |
| "learning_rate": 1.0892527362924426e-05, | |
| "loss": 0.5681911110877991, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.4969991889699918, | |
| "grad_norm": 0.5308411717414856, | |
| "learning_rate": 1.0878284058420647e-05, | |
| "loss": 0.6325392127037048, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.4982968369829683, | |
| "grad_norm": 0.5330897569656372, | |
| "learning_rate": 1.0864038958048267e-05, | |
| "loss": 0.5603891611099243, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.4995944849959448, | |
| "grad_norm": 0.5287606716156006, | |
| "learning_rate": 1.084979209093491e-05, | |
| "loss": 0.5920351147651672, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.5008921330089213, | |
| "grad_norm": 0.5484432578086853, | |
| "learning_rate": 1.0835543486211815e-05, | |
| "loss": 0.6529064178466797, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.5021897810218978, | |
| "grad_norm": 0.5554434061050415, | |
| "learning_rate": 1.0821293173013769e-05, | |
| "loss": 0.6203141212463379, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.5034874290348743, | |
| "grad_norm": 0.4985191226005554, | |
| "learning_rate": 1.0807041180479054e-05, | |
| "loss": 0.5167315006256104, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.5047850770478508, | |
| "grad_norm": 0.5687364339828491, | |
| "learning_rate": 1.0792787537749392e-05, | |
| "loss": 0.6727509498596191, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.5060827250608273, | |
| "grad_norm": 0.5391871333122253, | |
| "learning_rate": 1.0778532273969877e-05, | |
| "loss": 0.5891563892364502, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.5073803730738038, | |
| "grad_norm": 0.5688561201095581, | |
| "learning_rate": 1.0764275418288908e-05, | |
| "loss": 0.6336361169815063, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.50867802108678, | |
| "grad_norm": 0.5307201743125916, | |
| "learning_rate": 1.0750016999858151e-05, | |
| "loss": 0.6088765263557434, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.5099756690997568, | |
| "grad_norm": 0.5417827367782593, | |
| "learning_rate": 1.0735757047832461e-05, | |
| "loss": 0.6234108209609985, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.511273317112733, | |
| "grad_norm": 0.5165390968322754, | |
| "learning_rate": 1.0721495591369832e-05, | |
| "loss": 0.5378797054290771, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.5125709651257098, | |
| "grad_norm": 0.5508493781089783, | |
| "learning_rate": 1.0707232659631333e-05, | |
| "loss": 0.6575205326080322, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.513868613138686, | |
| "grad_norm": 0.5701325535774231, | |
| "learning_rate": 1.0692968281781046e-05, | |
| "loss": 0.5776763558387756, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.5151662611516628, | |
| "grad_norm": 0.5180992484092712, | |
| "learning_rate": 1.0678702486986016e-05, | |
| "loss": 0.5627498626708984, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.516463909164639, | |
| "grad_norm": 0.5465271472930908, | |
| "learning_rate": 1.0664435304416185e-05, | |
| "loss": 0.5880453586578369, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.5177615571776155, | |
| "grad_norm": 0.5629556775093079, | |
| "learning_rate": 1.065016676324433e-05, | |
| "loss": 0.6594117879867554, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.519059205190592, | |
| "grad_norm": 0.5278184413909912, | |
| "learning_rate": 1.0635896892645998e-05, | |
| "loss": 0.5453213453292847, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.5203568532035685, | |
| "grad_norm": 0.5409108400344849, | |
| "learning_rate": 1.0621625721799473e-05, | |
| "loss": 0.6020928025245667, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.521654501216545, | |
| "grad_norm": 0.5297386050224304, | |
| "learning_rate": 1.0607353279885682e-05, | |
| "loss": 0.581575870513916, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.5229521492295215, | |
| "grad_norm": 0.5326167345046997, | |
| "learning_rate": 1.0593079596088155e-05, | |
| "loss": 0.5731886029243469, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.524249797242498, | |
| "grad_norm": 0.5496317148208618, | |
| "learning_rate": 1.0578804699592968e-05, | |
| "loss": 0.6127786636352539, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.5255474452554745, | |
| "grad_norm": 0.5222692489624023, | |
| "learning_rate": 1.0564528619588668e-05, | |
| "loss": 0.5508180856704712, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.526845093268451, | |
| "grad_norm": 0.5078931450843811, | |
| "learning_rate": 1.0550251385266223e-05, | |
| "loss": 0.590618908405304, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.5281427412814275, | |
| "grad_norm": 0.545173704624176, | |
| "learning_rate": 1.0535973025818969e-05, | |
| "loss": 0.5988805294036865, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.529440389294404, | |
| "grad_norm": 0.5643585920333862, | |
| "learning_rate": 1.0521693570442533e-05, | |
| "loss": 0.6470606327056885, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.5307380373073802, | |
| "grad_norm": 0.5382372140884399, | |
| "learning_rate": 1.050741304833479e-05, | |
| "loss": 0.6253216862678528, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.532035685320357, | |
| "grad_norm": 0.527792751789093, | |
| "learning_rate": 1.0493131488695789e-05, | |
| "loss": 0.5740289092063904, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.5333333333333332, | |
| "grad_norm": 0.5286063551902771, | |
| "learning_rate": 1.0478848920727707e-05, | |
| "loss": 0.5898089408874512, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.53463098134631, | |
| "grad_norm": 0.5210081338882446, | |
| "learning_rate": 1.0464565373634784e-05, | |
| "loss": 0.5460256338119507, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.5359286293592862, | |
| "grad_norm": 0.542233943939209, | |
| "learning_rate": 1.0450280876623253e-05, | |
| "loss": 0.6149614453315735, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.537226277372263, | |
| "grad_norm": 0.5287345051765442, | |
| "learning_rate": 1.0435995458901298e-05, | |
| "loss": 0.5987131595611572, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.5385239253852392, | |
| "grad_norm": 0.542398989200592, | |
| "learning_rate": 1.042170914967898e-05, | |
| "loss": 0.5659464001655579, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.5398215733982157, | |
| "grad_norm": 0.5581417679786682, | |
| "learning_rate": 1.0407421978168186e-05, | |
| "loss": 0.648675262928009, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.5411192214111922, | |
| "grad_norm": 0.542323112487793, | |
| "learning_rate": 1.0393133973582572e-05, | |
| "loss": 0.6466338634490967, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.5424168694241687, | |
| "grad_norm": 0.5204232335090637, | |
| "learning_rate": 1.0378845165137483e-05, | |
| "loss": 0.5785092115402222, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.5437145174371452, | |
| "grad_norm": 0.5261425375938416, | |
| "learning_rate": 1.0364555582049917e-05, | |
| "loss": 0.6130785346031189, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.5450121654501217, | |
| "grad_norm": 0.5651884078979492, | |
| "learning_rate": 1.0350265253538458e-05, | |
| "loss": 0.6042903661727905, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.5463098134630981, | |
| "grad_norm": 0.5569320917129517, | |
| "learning_rate": 1.033597420882321e-05, | |
| "loss": 0.6515809297561646, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.5476074614760746, | |
| "grad_norm": 0.5539842844009399, | |
| "learning_rate": 1.0321682477125743e-05, | |
| "loss": 0.6051802039146423, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.5489051094890511, | |
| "grad_norm": 0.5327019691467285, | |
| "learning_rate": 1.0307390087669026e-05, | |
| "loss": 0.5866248607635498, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.5502027575020276, | |
| "grad_norm": 0.5504518151283264, | |
| "learning_rate": 1.0293097069677382e-05, | |
| "loss": 0.6087076663970947, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.5515004055150041, | |
| "grad_norm": 0.5322021842002869, | |
| "learning_rate": 1.0278803452376416e-05, | |
| "loss": 0.5527307391166687, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.5527980535279804, | |
| "grad_norm": 0.5314878821372986, | |
| "learning_rate": 1.0264509264992954e-05, | |
| "loss": 0.623512327671051, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.554095701540957, | |
| "grad_norm": 0.5596524477005005, | |
| "learning_rate": 1.0250214536754996e-05, | |
| "loss": 0.6276538372039795, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.5553933495539334, | |
| "grad_norm": 0.5265888571739197, | |
| "learning_rate": 1.0235919296891641e-05, | |
| "loss": 0.5611189603805542, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.55669099756691, | |
| "grad_norm": 0.5899763107299805, | |
| "learning_rate": 1.0221623574633035e-05, | |
| "loss": 0.6541014909744263, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.5579886455798864, | |
| "grad_norm": 0.545138955116272, | |
| "learning_rate": 1.0207327399210311e-05, | |
| "loss": 0.5935692191123962, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.559286293592863, | |
| "grad_norm": 0.5380452871322632, | |
| "learning_rate": 1.0193030799855534e-05, | |
| "loss": 0.5741644501686096, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.5605839416058394, | |
| "grad_norm": 0.5540161728858948, | |
| "learning_rate": 1.0178733805801626e-05, | |
| "loss": 0.625443696975708, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.5618815896188158, | |
| "grad_norm": 0.5784110426902771, | |
| "learning_rate": 1.0164436446282324e-05, | |
| "loss": 0.6342917680740356, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.5631792376317923, | |
| "grad_norm": 0.5346982479095459, | |
| "learning_rate": 1.015013875053211e-05, | |
| "loss": 0.5571820735931396, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.5644768856447688, | |
| "grad_norm": 0.5152148008346558, | |
| "learning_rate": 1.013584074778615e-05, | |
| "loss": 0.5197643041610718, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.5657745336577453, | |
| "grad_norm": 0.5702791213989258, | |
| "learning_rate": 1.0121542467280245e-05, | |
| "loss": 0.6099081635475159, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.5670721816707218, | |
| "grad_norm": 0.5424299836158752, | |
| "learning_rate": 1.0107243938250755e-05, | |
| "loss": 0.5385927557945251, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.5683698296836983, | |
| "grad_norm": 0.5413081049919128, | |
| "learning_rate": 1.0092945189934558e-05, | |
| "loss": 0.6308001279830933, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.5696674776966748, | |
| "grad_norm": 0.5650938749313354, | |
| "learning_rate": 1.007864625156897e-05, | |
| "loss": 0.656417965888977, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.5709651257096513, | |
| "grad_norm": 0.5578048229217529, | |
| "learning_rate": 1.0064347152391703e-05, | |
| "loss": 0.5987565517425537, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.5722627737226276, | |
| "grad_norm": 0.5425694584846497, | |
| "learning_rate": 1.0050047921640797e-05, | |
| "loss": 0.5794038772583008, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.5735604217356043, | |
| "grad_norm": 0.5536248087882996, | |
| "learning_rate": 1.003574858855456e-05, | |
| "loss": 0.6126576066017151, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.5748580697485806, | |
| "grad_norm": 0.5221614837646484, | |
| "learning_rate": 1.0021449182371504e-05, | |
| "loss": 0.5808907747268677, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.5761557177615573, | |
| "grad_norm": 0.5314812660217285, | |
| "learning_rate": 1.0007149732330299e-05, | |
| "loss": 0.5740360021591187, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.5774533657745335, | |
| "grad_norm": 0.556327223777771, | |
| "learning_rate": 9.992850267669703e-06, | |
| "loss": 0.6449018716812134, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.5787510137875103, | |
| "grad_norm": 0.5447148680686951, | |
| "learning_rate": 9.978550817628501e-06, | |
| "loss": 0.5590343475341797, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.5800486618004865, | |
| "grad_norm": 0.5570490956306458, | |
| "learning_rate": 9.964251411445444e-06, | |
| "loss": 0.6283855438232422, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.5813463098134632, | |
| "grad_norm": 0.5475562214851379, | |
| "learning_rate": 9.949952078359208e-06, | |
| "loss": 0.6058873534202576, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.5826439578264395, | |
| "grad_norm": 0.5271614789962769, | |
| "learning_rate": 9.935652847608302e-06, | |
| "loss": 0.6080070734024048, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.583941605839416, | |
| "grad_norm": 0.5340768098831177, | |
| "learning_rate": 9.921353748431036e-06, | |
| "loss": 0.5789950489997864, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.5852392538523925, | |
| "grad_norm": 0.5284969806671143, | |
| "learning_rate": 9.907054810065446e-06, | |
| "loss": 0.5514812469482422, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.586536901865369, | |
| "grad_norm": 0.5400740504264832, | |
| "learning_rate": 9.89275606174925e-06, | |
| "loss": 0.5774392485618591, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.5878345498783455, | |
| "grad_norm": 0.5264250040054321, | |
| "learning_rate": 9.878457532719757e-06, | |
| "loss": 0.5731384754180908, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.589132197891322, | |
| "grad_norm": 0.5703708529472351, | |
| "learning_rate": 9.864159252213852e-06, | |
| "loss": 0.6473686695098877, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.5904298459042985, | |
| "grad_norm": 0.5441808104515076, | |
| "learning_rate": 9.849861249467893e-06, | |
| "loss": 0.6381841897964478, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.591727493917275, | |
| "grad_norm": 0.5486851930618286, | |
| "learning_rate": 9.83556355371768e-06, | |
| "loss": 0.613477349281311, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.5930251419302515, | |
| "grad_norm": 0.5925759673118591, | |
| "learning_rate": 9.821266194198375e-06, | |
| "loss": 0.5966989994049072, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.5943227899432277, | |
| "grad_norm": 0.503745436668396, | |
| "learning_rate": 9.806969200144471e-06, | |
| "loss": 0.5462368726730347, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.5956204379562045, | |
| "grad_norm": 0.525786817073822, | |
| "learning_rate": 9.79267260078969e-06, | |
| "loss": 0.5990958213806152, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.5969180859691807, | |
| "grad_norm": 0.5402313470840454, | |
| "learning_rate": 9.778376425366967e-06, | |
| "loss": 0.6069964170455933, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.5982157339821574, | |
| "grad_norm": 0.566880464553833, | |
| "learning_rate": 9.764080703108362e-06, | |
| "loss": 0.6295340061187744, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.5995133819951337, | |
| "grad_norm": 0.5545258522033691, | |
| "learning_rate": 9.749785463245006e-06, | |
| "loss": 0.6260232925415039, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.6008110300081104, | |
| "grad_norm": 0.5898419618606567, | |
| "learning_rate": 9.735490735007047e-06, | |
| "loss": 0.6146451830863953, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.6021086780210867, | |
| "grad_norm": 0.5249006748199463, | |
| "learning_rate": 9.721196547623585e-06, | |
| "loss": 0.6049670577049255, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.6034063260340634, | |
| "grad_norm": 0.5289062857627869, | |
| "learning_rate": 9.706902930322621e-06, | |
| "loss": 0.6006771326065063, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.6047039740470397, | |
| "grad_norm": 0.5482916235923767, | |
| "learning_rate": 9.692609912330975e-06, | |
| "loss": 0.621732771396637, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.6060016220600162, | |
| "grad_norm": 0.5499362945556641, | |
| "learning_rate": 9.67831752287426e-06, | |
| "loss": 0.6316919922828674, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.6072992700729927, | |
| "grad_norm": 0.5119637250900269, | |
| "learning_rate": 9.66402579117679e-06, | |
| "loss": 0.5918980240821838, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.6085969180859692, | |
| "grad_norm": 0.5473806262016296, | |
| "learning_rate": 9.649734746461544e-06, | |
| "loss": 0.6354460716247559, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.6098945660989457, | |
| "grad_norm": 0.5340628027915955, | |
| "learning_rate": 9.635444417950083e-06, | |
| "loss": 0.5693660378456116, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.6111922141119221, | |
| "grad_norm": 0.5385611653327942, | |
| "learning_rate": 9.62115483486252e-06, | |
| "loss": 0.5467959642410278, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.6124898621248986, | |
| "grad_norm": 0.5278156399726868, | |
| "learning_rate": 9.606866026417431e-06, | |
| "loss": 0.6024355888366699, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.6137875101378751, | |
| "grad_norm": 0.5506213903427124, | |
| "learning_rate": 9.592578021831817e-06, | |
| "loss": 0.6594349145889282, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.6150851581508516, | |
| "grad_norm": 0.5613592267036438, | |
| "learning_rate": 9.578290850321023e-06, | |
| "loss": 0.6147022247314453, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.616382806163828, | |
| "grad_norm": 0.5302473306655884, | |
| "learning_rate": 9.564004541098709e-06, | |
| "loss": 0.5724552869796753, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.6176804541768046, | |
| "grad_norm": 0.5463687777519226, | |
| "learning_rate": 9.549719123376749e-06, | |
| "loss": 0.6859567165374756, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.6189781021897809, | |
| "grad_norm": 0.578063428401947, | |
| "learning_rate": 9.535434626365221e-06, | |
| "loss": 0.654534101486206, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.6202757502027576, | |
| "grad_norm": 0.5842363238334656, | |
| "learning_rate": 9.521151079272295e-06, | |
| "loss": 0.6818944811820984, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.6215733982157339, | |
| "grad_norm": 0.5462816953659058, | |
| "learning_rate": 9.506868511304216e-06, | |
| "loss": 0.5978901386260986, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.6228710462287106, | |
| "grad_norm": 0.5496495962142944, | |
| "learning_rate": 9.492586951665214e-06, | |
| "loss": 0.6664569973945618, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.6241686942416869, | |
| "grad_norm": 0.541262149810791, | |
| "learning_rate": 9.47830642955747e-06, | |
| "loss": 0.5771492719650269, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.6254663422546636, | |
| "grad_norm": 0.5542916655540466, | |
| "learning_rate": 9.464026974181035e-06, | |
| "loss": 0.6377862095832825, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.6267639902676398, | |
| "grad_norm": 0.5212349891662598, | |
| "learning_rate": 9.44974861473378e-06, | |
| "loss": 0.5878604650497437, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.6280616382806163, | |
| "grad_norm": 0.5611302256584167, | |
| "learning_rate": 9.435471380411335e-06, | |
| "loss": 0.636326789855957, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.6293592862935928, | |
| "grad_norm": 0.5258191227912903, | |
| "learning_rate": 9.421195300407035e-06, | |
| "loss": 0.5580926537513733, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.6306569343065693, | |
| "grad_norm": 0.5298276543617249, | |
| "learning_rate": 9.406920403911848e-06, | |
| "loss": 0.6048216819763184, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.6319545823195458, | |
| "grad_norm": 0.5328834056854248, | |
| "learning_rate": 9.392646720114325e-06, | |
| "loss": 0.6379623413085938, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.6332522303325223, | |
| "grad_norm": 0.5315790176391602, | |
| "learning_rate": 9.37837427820053e-06, | |
| "loss": 0.6466155052185059, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.6345498783454988, | |
| "grad_norm": 0.5353376269340515, | |
| "learning_rate": 9.364103107354002e-06, | |
| "loss": 0.5879526138305664, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.6358475263584753, | |
| "grad_norm": 0.5551068186759949, | |
| "learning_rate": 9.349833236755675e-06, | |
| "loss": 0.5988892316818237, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.6371451743714518, | |
| "grad_norm": 0.5331724286079407, | |
| "learning_rate": 9.335564695583816e-06, | |
| "loss": 0.5948902368545532, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.638442822384428, | |
| "grad_norm": 0.54310542345047, | |
| "learning_rate": 9.321297513013987e-06, | |
| "loss": 0.6055219769477844, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.6397404703974048, | |
| "grad_norm": 0.5368586182594299, | |
| "learning_rate": 9.307031718218956e-06, | |
| "loss": 0.6035459637641907, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.641038118410381, | |
| "grad_norm": 0.5460159182548523, | |
| "learning_rate": 9.292767340368672e-06, | |
| "loss": 0.6447773575782776, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.6423357664233578, | |
| "grad_norm": 0.5599712133407593, | |
| "learning_rate": 9.278504408630171e-06, | |
| "loss": 0.6332420110702515, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.643633414436334, | |
| "grad_norm": 0.5388185977935791, | |
| "learning_rate": 9.264242952167544e-06, | |
| "loss": 0.6116797924041748, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.6449310624493108, | |
| "grad_norm": 0.5109002590179443, | |
| "learning_rate": 9.24998300014185e-06, | |
| "loss": 0.628926694393158, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.646228710462287, | |
| "grad_norm": 0.5572671890258789, | |
| "learning_rate": 9.235724581711096e-06, | |
| "loss": 0.5795090794563293, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.6475263584752637, | |
| "grad_norm": 0.777040421962738, | |
| "learning_rate": 9.221467726030126e-06, | |
| "loss": 0.644891083240509, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.64882400648824, | |
| "grad_norm": 0.5158191919326782, | |
| "learning_rate": 9.207212462250611e-06, | |
| "loss": 0.5630925893783569, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.6501216545012165, | |
| "grad_norm": 0.5111160278320312, | |
| "learning_rate": 9.192958819520948e-06, | |
| "loss": 0.5322938561439514, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.651419302514193, | |
| "grad_norm": 0.5043333768844604, | |
| "learning_rate": 9.178706826986236e-06, | |
| "loss": 0.5961562395095825, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.6527169505271695, | |
| "grad_norm": 0.5496838092803955, | |
| "learning_rate": 9.164456513788186e-06, | |
| "loss": 0.6005456447601318, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.654014598540146, | |
| "grad_norm": 0.5577642321586609, | |
| "learning_rate": 9.150207909065093e-06, | |
| "loss": 0.6366305351257324, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.6553122465531225, | |
| "grad_norm": 0.5257747769355774, | |
| "learning_rate": 9.135961041951735e-06, | |
| "loss": 0.5669390559196472, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.656609894566099, | |
| "grad_norm": 0.5349394083023071, | |
| "learning_rate": 9.121715941579358e-06, | |
| "loss": 0.5594930052757263, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.6579075425790755, | |
| "grad_norm": 0.5282658338546753, | |
| "learning_rate": 9.107472637075578e-06, | |
| "loss": 0.6159694194793701, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.659205190592052, | |
| "grad_norm": 0.5608229637145996, | |
| "learning_rate": 9.093231157564357e-06, | |
| "loss": 0.6022686958312988, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.6605028386050282, | |
| "grad_norm": 0.5175761580467224, | |
| "learning_rate": 9.078991532165911e-06, | |
| "loss": 0.5850685834884644, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.661800486618005, | |
| "grad_norm": 0.5338742733001709, | |
| "learning_rate": 9.06475378999667e-06, | |
| "loss": 0.5943388938903809, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.6630981346309812, | |
| "grad_norm": 0.5751469135284424, | |
| "learning_rate": 9.050517960169211e-06, | |
| "loss": 0.6381434798240662, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.664395782643958, | |
| "grad_norm": 0.5597715377807617, | |
| "learning_rate": 9.036284071792212e-06, | |
| "loss": 0.6742138862609863, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.6656934306569342, | |
| "grad_norm": 0.5457910895347595, | |
| "learning_rate": 9.022052153970361e-06, | |
| "loss": 0.6068155169487, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.666991078669911, | |
| "grad_norm": 0.5507814884185791, | |
| "learning_rate": 9.007822235804334e-06, | |
| "loss": 0.6176409125328064, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.6682887266828872, | |
| "grad_norm": 0.5373377203941345, | |
| "learning_rate": 8.993594346390709e-06, | |
| "loss": 0.5884984731674194, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.669586374695864, | |
| "grad_norm": 0.523912787437439, | |
| "learning_rate": 8.979368514821917e-06, | |
| "loss": 0.5794025659561157, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.6708840227088402, | |
| "grad_norm": 0.5313317179679871, | |
| "learning_rate": 8.965144770186192e-06, | |
| "loss": 0.6304433345794678, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.6721816707218167, | |
| "grad_norm": 0.5308225154876709, | |
| "learning_rate": 8.950923141567482e-06, | |
| "loss": 0.5822694301605225, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.6734793187347932, | |
| "grad_norm": 0.5657337307929993, | |
| "learning_rate": 8.936703658045426e-06, | |
| "loss": 0.7206499576568604, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.6747769667477697, | |
| "grad_norm": 0.5842191576957703, | |
| "learning_rate": 8.92248634869526e-06, | |
| "loss": 0.6483322381973267, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.6760746147607462, | |
| "grad_norm": 0.5084115266799927, | |
| "learning_rate": 8.90827124258779e-06, | |
| "loss": 0.60451340675354, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.6773722627737226, | |
| "grad_norm": 0.5080921053886414, | |
| "learning_rate": 8.894058368789308e-06, | |
| "loss": 0.5007386803627014, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.6786699107866991, | |
| "grad_norm": 0.5186359286308289, | |
| "learning_rate": 8.879847756361544e-06, | |
| "loss": 0.5846607685089111, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.6799675587996756, | |
| "grad_norm": 0.5321721434593201, | |
| "learning_rate": 8.8656394343616e-06, | |
| "loss": 0.5854955315589905, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.6812652068126521, | |
| "grad_norm": 0.5577939748764038, | |
| "learning_rate": 8.851433431841904e-06, | |
| "loss": 0.6218785643577576, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.6825628548256284, | |
| "grad_norm": 0.5574389696121216, | |
| "learning_rate": 8.837229777850129e-06, | |
| "loss": 0.639427661895752, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.683860502838605, | |
| "grad_norm": 0.5620577335357666, | |
| "learning_rate": 8.823028501429161e-06, | |
| "loss": 0.6334304809570312, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.6851581508515814, | |
| "grad_norm": 0.5603854656219482, | |
| "learning_rate": 8.808829631617009e-06, | |
| "loss": 0.5796216726303101, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 1.686455798864558, | |
| "grad_norm": 0.5886275172233582, | |
| "learning_rate": 8.79463319744677e-06, | |
| "loss": 0.6645929217338562, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.6877534468775344, | |
| "grad_norm": 0.5587744116783142, | |
| "learning_rate": 8.78043922794656e-06, | |
| "loss": 0.6387877464294434, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 1.689051094890511, | |
| "grad_norm": 0.5619886517524719, | |
| "learning_rate": 8.766247752139453e-06, | |
| "loss": 0.658257007598877, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.6903487429034874, | |
| "grad_norm": 0.5658282041549683, | |
| "learning_rate": 8.752058799043422e-06, | |
| "loss": 0.6349663734436035, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 1.691646390916464, | |
| "grad_norm": 0.5596343874931335, | |
| "learning_rate": 8.737872397671293e-06, | |
| "loss": 0.5926494002342224, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.6929440389294403, | |
| "grad_norm": 0.5565075874328613, | |
| "learning_rate": 8.723688577030655e-06, | |
| "loss": 0.6093648672103882, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.6942416869424168, | |
| "grad_norm": 0.5608682036399841, | |
| "learning_rate": 8.709507366123841e-06, | |
| "loss": 0.6120996475219727, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.6955393349553933, | |
| "grad_norm": 0.5365821719169617, | |
| "learning_rate": 8.695328793947833e-06, | |
| "loss": 0.5509933233261108, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 1.6968369829683698, | |
| "grad_norm": 0.537822961807251, | |
| "learning_rate": 8.681152889494227e-06, | |
| "loss": 0.6313689947128296, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.6981346309813463, | |
| "grad_norm": 0.5853676199913025, | |
| "learning_rate": 8.66697968174915e-06, | |
| "loss": 0.6015232801437378, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 1.6994322789943228, | |
| "grad_norm": 0.5395903587341309, | |
| "learning_rate": 8.652809199693236e-06, | |
| "loss": 0.5783022046089172, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.7007299270072993, | |
| "grad_norm": 0.5408870577812195, | |
| "learning_rate": 8.638641472301524e-06, | |
| "loss": 0.6224579215049744, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 1.7020275750202758, | |
| "grad_norm": 0.5533918142318726, | |
| "learning_rate": 8.624476528543439e-06, | |
| "loss": 0.6317031383514404, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.7033252230332523, | |
| "grad_norm": 0.577556848526001, | |
| "learning_rate": 8.610314397382701e-06, | |
| "loss": 0.6522644758224487, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 1.7046228710462286, | |
| "grad_norm": 0.5453810095787048, | |
| "learning_rate": 8.596155107777288e-06, | |
| "loss": 0.6072216629981995, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.7059205190592053, | |
| "grad_norm": 0.5380662679672241, | |
| "learning_rate": 8.581998688679356e-06, | |
| "loss": 0.6069589853286743, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.7072181670721815, | |
| "grad_norm": 0.5374992489814758, | |
| "learning_rate": 8.567845169035205e-06, | |
| "loss": 0.6239044070243835, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.7085158150851583, | |
| "grad_norm": 0.5366406440734863, | |
| "learning_rate": 8.553694577785201e-06, | |
| "loss": 0.5901238322257996, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 1.7098134630981345, | |
| "grad_norm": 0.5510634779930115, | |
| "learning_rate": 8.539546943863717e-06, | |
| "loss": 0.6066378355026245, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.7111111111111112, | |
| "grad_norm": 0.5579630732536316, | |
| "learning_rate": 8.525402296199089e-06, | |
| "loss": 0.6439074873924255, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 1.7124087591240875, | |
| "grad_norm": 0.5268120765686035, | |
| "learning_rate": 8.511260663713537e-06, | |
| "loss": 0.5521663427352905, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.7137064071370642, | |
| "grad_norm": 0.5076732635498047, | |
| "learning_rate": 8.497122075323122e-06, | |
| "loss": 0.5523797273635864, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 1.7150040551500405, | |
| "grad_norm": 0.5172733068466187, | |
| "learning_rate": 8.482986559937676e-06, | |
| "loss": 0.6011000275611877, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.716301703163017, | |
| "grad_norm": 0.5152168869972229, | |
| "learning_rate": 8.468854146460754e-06, | |
| "loss": 0.5801671743392944, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 1.7175993511759935, | |
| "grad_norm": 0.5168895721435547, | |
| "learning_rate": 8.45472486378956e-06, | |
| "loss": 0.6005280613899231, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.71889699918897, | |
| "grad_norm": 0.571263313293457, | |
| "learning_rate": 8.440598740814909e-06, | |
| "loss": 0.6543586850166321, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.7201946472019465, | |
| "grad_norm": 0.5240177512168884, | |
| "learning_rate": 8.426475806421139e-06, | |
| "loss": 0.613470196723938, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.721492295214923, | |
| "grad_norm": 0.5217388272285461, | |
| "learning_rate": 8.412356089486082e-06, | |
| "loss": 0.5799127817153931, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 1.7227899432278995, | |
| "grad_norm": 0.5473462343215942, | |
| "learning_rate": 8.39823961888098e-06, | |
| "loss": 0.6159072518348694, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.724087591240876, | |
| "grad_norm": 0.9222651124000549, | |
| "learning_rate": 8.384126423470447e-06, | |
| "loss": 0.6260055303573608, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 1.7253852392538525, | |
| "grad_norm": 0.5530563592910767, | |
| "learning_rate": 8.37001653211239e-06, | |
| "loss": 0.5505119562149048, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.7266828872668287, | |
| "grad_norm": 0.5369389653205872, | |
| "learning_rate": 8.355909973657975e-06, | |
| "loss": 0.6139888763427734, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 1.7279805352798054, | |
| "grad_norm": 0.5347586870193481, | |
| "learning_rate": 8.341806776951532e-06, | |
| "loss": 0.6265066862106323, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.7292781832927817, | |
| "grad_norm": 0.545946478843689, | |
| "learning_rate": 8.327706970830537e-06, | |
| "loss": 0.6024926900863647, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 1.7305758313057584, | |
| "grad_norm": 0.5450059771537781, | |
| "learning_rate": 8.313610584125523e-06, | |
| "loss": 0.658405065536499, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.7318734793187347, | |
| "grad_norm": 0.5516889691352844, | |
| "learning_rate": 8.299517645660033e-06, | |
| "loss": 0.5770267248153687, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.7331711273317114, | |
| "grad_norm": 0.557074785232544, | |
| "learning_rate": 8.285428184250554e-06, | |
| "loss": 0.5421329736709595, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.7344687753446877, | |
| "grad_norm": 0.543565571308136, | |
| "learning_rate": 8.271342228706478e-06, | |
| "loss": 0.6527873277664185, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 1.7357664233576642, | |
| "grad_norm": 0.49616673588752747, | |
| "learning_rate": 8.257259807830009e-06, | |
| "loss": 0.5355008840560913, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.7370640713706407, | |
| "grad_norm": 0.5389429330825806, | |
| "learning_rate": 8.243180950416142e-06, | |
| "loss": 0.6072633862495422, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 1.7383617193836172, | |
| "grad_norm": 0.542195737361908, | |
| "learning_rate": 8.22910568525257e-06, | |
| "loss": 0.5909712314605713, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.7396593673965937, | |
| "grad_norm": 0.5480629205703735, | |
| "learning_rate": 8.215034041119655e-06, | |
| "loss": 0.5966728925704956, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 1.7409570154095702, | |
| "grad_norm": 0.5179266929626465, | |
| "learning_rate": 8.200966046790339e-06, | |
| "loss": 0.608291745185852, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.7422546634225466, | |
| "grad_norm": 0.525390625, | |
| "learning_rate": 8.186901731030117e-06, | |
| "loss": 0.6019555330276489, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 1.7435523114355231, | |
| "grad_norm": 0.5716756582260132, | |
| "learning_rate": 8.172841122596951e-06, | |
| "loss": 0.6858773827552795, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.7448499594484996, | |
| "grad_norm": 0.53510981798172, | |
| "learning_rate": 8.158784250241226e-06, | |
| "loss": 0.6193398833274841, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.7461476074614761, | |
| "grad_norm": 0.509371280670166, | |
| "learning_rate": 8.144731142705693e-06, | |
| "loss": 0.5310204029083252, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.7474452554744526, | |
| "grad_norm": 0.520005464553833, | |
| "learning_rate": 8.130681828725394e-06, | |
| "loss": 0.5864765644073486, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 1.748742903487429, | |
| "grad_norm": 0.530784010887146, | |
| "learning_rate": 8.116636337027626e-06, | |
| "loss": 0.5898761749267578, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.7500405515004056, | |
| "grad_norm": 0.528357982635498, | |
| "learning_rate": 8.10259469633186e-06, | |
| "loss": 0.611457347869873, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 1.7513381995133819, | |
| "grad_norm": 0.5243317484855652, | |
| "learning_rate": 8.0885569353497e-06, | |
| "loss": 0.5851372480392456, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.7526358475263586, | |
| "grad_norm": 0.5656478404998779, | |
| "learning_rate": 8.07452308278481e-06, | |
| "loss": 0.6243469715118408, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 1.7539334955393349, | |
| "grad_norm": 0.5173115134239197, | |
| "learning_rate": 8.060493167332874e-06, | |
| "loss": 0.5658408403396606, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.7552311435523116, | |
| "grad_norm": 0.5283849835395813, | |
| "learning_rate": 8.04646721768151e-06, | |
| "loss": 0.6133898496627808, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 1.7565287915652879, | |
| "grad_norm": 0.5533227324485779, | |
| "learning_rate": 8.032445262510241e-06, | |
| "loss": 0.6251792907714844, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.7578264395782643, | |
| "grad_norm": 0.5281651020050049, | |
| "learning_rate": 8.018427330490411e-06, | |
| "loss": 0.5514408349990845, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.7591240875912408, | |
| "grad_norm": 0.5382410883903503, | |
| "learning_rate": 8.004413450285147e-06, | |
| "loss": 0.6591918468475342, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.7604217356042173, | |
| "grad_norm": 0.566716194152832, | |
| "learning_rate": 7.990403650549285e-06, | |
| "loss": 0.6281836628913879, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 1.7617193836171938, | |
| "grad_norm": 0.5423158407211304, | |
| "learning_rate": 7.976397959929324e-06, | |
| "loss": 0.5953754782676697, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.7630170316301703, | |
| "grad_norm": 0.5327609181404114, | |
| "learning_rate": 7.962396407063346e-06, | |
| "loss": 0.6248747110366821, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 1.7643146796431468, | |
| "grad_norm": 0.5314010381698608, | |
| "learning_rate": 7.948399020580995e-06, | |
| "loss": 0.5661095380783081, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.7656123276561233, | |
| "grad_norm": 0.5650714039802551, | |
| "learning_rate": 7.934405829103376e-06, | |
| "loss": 0.6127238869667053, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 1.7669099756690998, | |
| "grad_norm": 0.546101987361908, | |
| "learning_rate": 7.920416861243028e-06, | |
| "loss": 0.5874890089035034, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.7682076236820763, | |
| "grad_norm": 0.5429707169532776, | |
| "learning_rate": 7.906432145603844e-06, | |
| "loss": 0.6140427589416504, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 1.7695052716950528, | |
| "grad_norm": 0.5710042715072632, | |
| "learning_rate": 7.892451710781035e-06, | |
| "loss": 0.612266480922699, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.770802919708029, | |
| "grad_norm": 0.55032879114151, | |
| "learning_rate": 7.878475585361045e-06, | |
| "loss": 0.6138355135917664, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.7721005677210058, | |
| "grad_norm": 0.5812238454818726, | |
| "learning_rate": 7.864503797921518e-06, | |
| "loss": 0.6380466818809509, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.773398215733982, | |
| "grad_norm": 0.5375271439552307, | |
| "learning_rate": 7.850536377031221e-06, | |
| "loss": 0.6307961344718933, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 1.7746958637469588, | |
| "grad_norm": 0.5584734082221985, | |
| "learning_rate": 7.836573351249996e-06, | |
| "loss": 0.6312189698219299, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.775993511759935, | |
| "grad_norm": 0.5133419036865234, | |
| "learning_rate": 7.822614749128692e-06, | |
| "loss": 0.5199952125549316, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 1.7772911597729117, | |
| "grad_norm": 0.5400519371032715, | |
| "learning_rate": 7.808660599209124e-06, | |
| "loss": 0.630193829536438, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.778588807785888, | |
| "grad_norm": 0.5627943277359009, | |
| "learning_rate": 7.794710930023993e-06, | |
| "loss": 0.6233404874801636, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 1.7798864557988645, | |
| "grad_norm": 0.510907769203186, | |
| "learning_rate": 7.78076577009684e-06, | |
| "loss": 0.5262112021446228, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.781184103811841, | |
| "grad_norm": 0.5093023777008057, | |
| "learning_rate": 7.76682514794199e-06, | |
| "loss": 0.5871707201004028, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 1.7824817518248175, | |
| "grad_norm": 0.5214765667915344, | |
| "learning_rate": 7.752889092064484e-06, | |
| "loss": 0.5635697841644287, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.783779399837794, | |
| "grad_norm": 0.5440617799758911, | |
| "learning_rate": 7.738957630960037e-06, | |
| "loss": 0.5805234909057617, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.7850770478507705, | |
| "grad_norm": 0.5365013480186462, | |
| "learning_rate": 7.725030793114952e-06, | |
| "loss": 0.615504801273346, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.786374695863747, | |
| "grad_norm": 0.5464739203453064, | |
| "learning_rate": 7.711108607006094e-06, | |
| "loss": 0.6203770637512207, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 1.7876723438767235, | |
| "grad_norm": 0.5313665866851807, | |
| "learning_rate": 7.697191101100802e-06, | |
| "loss": 0.6234644055366516, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.7889699918897, | |
| "grad_norm": 0.5652154684066772, | |
| "learning_rate": 7.683278303856862e-06, | |
| "loss": 0.6404775977134705, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 1.7902676399026762, | |
| "grad_norm": 0.5399373769760132, | |
| "learning_rate": 7.669370243722415e-06, | |
| "loss": 0.6136540770530701, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.7902676399026762, | |
| "eval_loss": 0.6770720481872559, | |
| "eval_runtime": 72.4181, | |
| "eval_samples_per_second": 71.695, | |
| "eval_steps_per_second": 8.962, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.791565287915653, | |
| "grad_norm": 0.5250906944274902, | |
| "learning_rate": 7.655466949135932e-06, | |
| "loss": 0.6147629022598267, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 1.7928629359286292, | |
| "grad_norm": 0.5089812278747559, | |
| "learning_rate": 7.641568448526122e-06, | |
| "loss": 0.5584423542022705, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.794160583941606, | |
| "grad_norm": 0.53523850440979, | |
| "learning_rate": 7.627674770311909e-06, | |
| "loss": 0.5899471640586853, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 1.7954582319545822, | |
| "grad_norm": 0.5330705642700195, | |
| "learning_rate": 7.613785942902343e-06, | |
| "loss": 0.6054921746253967, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.796755879967559, | |
| "grad_norm": 0.514224648475647, | |
| "learning_rate": 7.599901994696566e-06, | |
| "loss": 0.57494056224823, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.7980535279805352, | |
| "grad_norm": 0.5187469124794006, | |
| "learning_rate": 7.586022954083731e-06, | |
| "loss": 0.5410253405570984, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.799351175993512, | |
| "grad_norm": 0.5295100808143616, | |
| "learning_rate": 7.572148849442971e-06, | |
| "loss": 0.5727859139442444, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 1.8006488240064882, | |
| "grad_norm": 0.5229355692863464, | |
| "learning_rate": 7.5582797091433105e-06, | |
| "loss": 0.5822583436965942, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.8019464720194647, | |
| "grad_norm": 0.5615860223770142, | |
| "learning_rate": 7.544415561543639e-06, | |
| "loss": 0.6505988836288452, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 1.8032441200324412, | |
| "grad_norm": 0.538707971572876, | |
| "learning_rate": 7.5305564349926215e-06, | |
| "loss": 0.5953875184059143, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.8045417680454177, | |
| "grad_norm": 0.5197842717170715, | |
| "learning_rate": 7.516702357828672e-06, | |
| "loss": 0.61934494972229, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 1.8058394160583942, | |
| "grad_norm": 0.49861758947372437, | |
| "learning_rate": 7.502853358379865e-06, | |
| "loss": 0.5522242784500122, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.8071370640713706, | |
| "grad_norm": 0.5618783235549927, | |
| "learning_rate": 7.489009464963903e-06, | |
| "loss": 0.6682146787643433, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 1.8084347120843471, | |
| "grad_norm": 0.9511061906814575, | |
| "learning_rate": 7.475170705888042e-06, | |
| "loss": 0.5893583297729492, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.8097323600973236, | |
| "grad_norm": 0.6068239808082581, | |
| "learning_rate": 7.461337109449045e-06, | |
| "loss": 0.6168926954269409, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 1.8110300081103001, | |
| "grad_norm": 0.517159640789032, | |
| "learning_rate": 7.447508703933109e-06, | |
| "loss": 0.5870746374130249, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.8123276561232764, | |
| "grad_norm": 0.5260257720947266, | |
| "learning_rate": 7.433685517615831e-06, | |
| "loss": 0.6144825220108032, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 1.8136253041362531, | |
| "grad_norm": 0.4919078052043915, | |
| "learning_rate": 7.4198675787621185e-06, | |
| "loss": 0.6141817569732666, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.8149229521492294, | |
| "grad_norm": 0.5349772572517395, | |
| "learning_rate": 7.406054915626172e-06, | |
| "loss": 0.5727092027664185, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 1.816220600162206, | |
| "grad_norm": 0.5762760639190674, | |
| "learning_rate": 7.392247556451382e-06, | |
| "loss": 0.647359311580658, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.8175182481751824, | |
| "grad_norm": 0.5478885769844055, | |
| "learning_rate": 7.378445529470303e-06, | |
| "loss": 0.6371256113052368, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 1.818815896188159, | |
| "grad_norm": 0.5577658414840698, | |
| "learning_rate": 7.364648862904593e-06, | |
| "loss": 0.6552213430404663, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.8201135442011354, | |
| "grad_norm": 0.5350478887557983, | |
| "learning_rate": 7.35085758496494e-06, | |
| "loss": 0.5756250023841858, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 1.821411192214112, | |
| "grad_norm": 0.5247483849525452, | |
| "learning_rate": 7.337071723851018e-06, | |
| "loss": 0.5872269868850708, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.8227088402270883, | |
| "grad_norm": 0.5715752840042114, | |
| "learning_rate": 7.323291307751418e-06, | |
| "loss": 0.6395775079727173, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 1.8240064882400648, | |
| "grad_norm": 0.5355315208435059, | |
| "learning_rate": 7.3095163648436115e-06, | |
| "loss": 0.5502926707267761, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.8253041362530413, | |
| "grad_norm": 0.5468769073486328, | |
| "learning_rate": 7.295746923293865e-06, | |
| "loss": 0.6266253590583801, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 1.8266017842660178, | |
| "grad_norm": 0.5183525681495667, | |
| "learning_rate": 7.2819830112572035e-06, | |
| "loss": 0.5890312194824219, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.8278994322789943, | |
| "grad_norm": 0.5416871905326843, | |
| "learning_rate": 7.268224656877339e-06, | |
| "loss": 0.6163492798805237, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 1.8291970802919708, | |
| "grad_norm": 0.5376898646354675, | |
| "learning_rate": 7.25447188828663e-06, | |
| "loss": 0.6440437436103821, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.8304947283049473, | |
| "grad_norm": 0.5264099836349487, | |
| "learning_rate": 7.240724733606002e-06, | |
| "loss": 0.6445986032485962, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 1.8317923763179238, | |
| "grad_norm": 0.5397512912750244, | |
| "learning_rate": 7.2269832209449145e-06, | |
| "loss": 0.5767061710357666, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.8330900243309003, | |
| "grad_norm": 0.5331466794013977, | |
| "learning_rate": 7.213247378401274e-06, | |
| "loss": 0.6515385508537292, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 1.8343876723438766, | |
| "grad_norm": 0.5380875468254089, | |
| "learning_rate": 7.199517234061408e-06, | |
| "loss": 0.5956803560256958, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.8356853203568533, | |
| "grad_norm": 0.5553707480430603, | |
| "learning_rate": 7.1857928159999814e-06, | |
| "loss": 0.5990528464317322, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 1.8369829683698295, | |
| "grad_norm": 0.5348111391067505, | |
| "learning_rate": 7.172074152279963e-06, | |
| "loss": 0.5816199779510498, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.8382806163828063, | |
| "grad_norm": 0.63777756690979, | |
| "learning_rate": 7.1583612709525405e-06, | |
| "loss": 0.6647042036056519, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 1.8395782643957825, | |
| "grad_norm": 0.5394327640533447, | |
| "learning_rate": 7.14465420005709e-06, | |
| "loss": 0.629410982131958, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.8408759124087593, | |
| "grad_norm": 0.5467361807823181, | |
| "learning_rate": 7.130952967621096e-06, | |
| "loss": 0.5931155681610107, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 1.8421735604217355, | |
| "grad_norm": 0.5642380714416504, | |
| "learning_rate": 7.11725760166012e-06, | |
| "loss": 0.59910649061203, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.8434712084347122, | |
| "grad_norm": 0.5448968410491943, | |
| "learning_rate": 7.103568130177713e-06, | |
| "loss": 0.5758746862411499, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 1.8447688564476885, | |
| "grad_norm": 0.5109772682189941, | |
| "learning_rate": 7.089884581165382e-06, | |
| "loss": 0.5374370217323303, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.846066504460665, | |
| "grad_norm": 0.5496018528938293, | |
| "learning_rate": 7.076206982602516e-06, | |
| "loss": 0.6080317497253418, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 1.8473641524736415, | |
| "grad_norm": 0.5525946021080017, | |
| "learning_rate": 7.06253536245635e-06, | |
| "loss": 0.6326315402984619, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.848661800486618, | |
| "grad_norm": 0.5555429458618164, | |
| "learning_rate": 7.048869748681879e-06, | |
| "loss": 0.6499879360198975, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.8499594484995945, | |
| "grad_norm": 0.5364986062049866, | |
| "learning_rate": 7.035210169221834e-06, | |
| "loss": 0.6402702331542969, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.851257096512571, | |
| "grad_norm": 0.5398283004760742, | |
| "learning_rate": 7.021556652006588e-06, | |
| "loss": 0.636422872543335, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 1.8525547445255475, | |
| "grad_norm": 0.5333319306373596, | |
| "learning_rate": 7.007909224954135e-06, | |
| "loss": 0.6210685968399048, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.853852392538524, | |
| "grad_norm": 0.5136668086051941, | |
| "learning_rate": 6.994267915970003e-06, | |
| "loss": 0.5984174013137817, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 1.8551500405515005, | |
| "grad_norm": 0.5352861285209656, | |
| "learning_rate": 6.980632752947221e-06, | |
| "loss": 0.6331675052642822, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.8564476885644767, | |
| "grad_norm": 0.5386180281639099, | |
| "learning_rate": 6.967003763766247e-06, | |
| "loss": 0.599821925163269, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 1.8577453365774534, | |
| "grad_norm": 0.5548969507217407, | |
| "learning_rate": 6.953380976294907e-06, | |
| "loss": 0.6447435617446899, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.8590429845904297, | |
| "grad_norm": 0.5061814188957214, | |
| "learning_rate": 6.9397644183883616e-06, | |
| "loss": 0.6045181751251221, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 1.8603406326034064, | |
| "grad_norm": 0.49961408972740173, | |
| "learning_rate": 6.926154117889022e-06, | |
| "loss": 0.5710508823394775, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.8616382806163827, | |
| "grad_norm": 0.5761319398880005, | |
| "learning_rate": 6.91255010262651e-06, | |
| "loss": 0.6047182679176331, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 1.8629359286293594, | |
| "grad_norm": 0.5302688479423523, | |
| "learning_rate": 6.898952400417587e-06, | |
| "loss": 0.5881869792938232, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.8642335766423357, | |
| "grad_norm": 0.567452609539032, | |
| "learning_rate": 6.885361039066121e-06, | |
| "loss": 0.6580846905708313, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 1.8655312246553124, | |
| "grad_norm": 0.5567494034767151, | |
| "learning_rate": 6.8717760463629965e-06, | |
| "loss": 0.6213802099227905, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.8668288726682887, | |
| "grad_norm": 0.535961925983429, | |
| "learning_rate": 6.858197450086097e-06, | |
| "loss": 0.6174903512001038, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 1.8681265206812652, | |
| "grad_norm": 0.5607694387435913, | |
| "learning_rate": 6.844625278000205e-06, | |
| "loss": 0.658057451248169, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.8694241686942417, | |
| "grad_norm": 0.5164813995361328, | |
| "learning_rate": 6.831059557856984e-06, | |
| "loss": 0.6188488602638245, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 1.8707218167072182, | |
| "grad_norm": 0.5046887397766113, | |
| "learning_rate": 6.81750031739489e-06, | |
| "loss": 0.5495269298553467, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.8720194647201946, | |
| "grad_norm": 0.5218680500984192, | |
| "learning_rate": 6.803947584339148e-06, | |
| "loss": 0.5858875513076782, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 1.8733171127331711, | |
| "grad_norm": 0.5279871225357056, | |
| "learning_rate": 6.79040138640166e-06, | |
| "loss": 0.5829395055770874, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.8746147607461476, | |
| "grad_norm": 0.5364516377449036, | |
| "learning_rate": 6.7768617512809745e-06, | |
| "loss": 0.6135284900665283, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 1.8759124087591241, | |
| "grad_norm": 0.5465746521949768, | |
| "learning_rate": 6.763328706662214e-06, | |
| "loss": 0.5970785617828369, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.8772100567721006, | |
| "grad_norm": 0.5328618288040161, | |
| "learning_rate": 6.749802280217037e-06, | |
| "loss": 0.6004316806793213, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 1.878507704785077, | |
| "grad_norm": 0.5282012224197388, | |
| "learning_rate": 6.7362824996035545e-06, | |
| "loss": 0.5903221368789673, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.8798053527980536, | |
| "grad_norm": 0.5416566133499146, | |
| "learning_rate": 6.722769392466304e-06, | |
| "loss": 0.624277651309967, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 1.8811030008110299, | |
| "grad_norm": 0.5569058060646057, | |
| "learning_rate": 6.709262986436162e-06, | |
| "loss": 0.6214337348937988, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.8824006488240066, | |
| "grad_norm": 0.5567551255226135, | |
| "learning_rate": 6.695763309130318e-06, | |
| "loss": 0.5963641405105591, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 1.8836982968369829, | |
| "grad_norm": 0.5245199203491211, | |
| "learning_rate": 6.682270388152185e-06, | |
| "loss": 0.5722153186798096, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.8849959448499596, | |
| "grad_norm": 0.5476487874984741, | |
| "learning_rate": 6.668784251091381e-06, | |
| "loss": 0.573593258857727, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 1.8862935928629359, | |
| "grad_norm": 0.5254029631614685, | |
| "learning_rate": 6.655304925523635e-06, | |
| "loss": 0.5607786774635315, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.8875912408759126, | |
| "grad_norm": 0.5431527495384216, | |
| "learning_rate": 6.641832439010765e-06, | |
| "loss": 0.5841714143753052, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 0.5374141931533813, | |
| "learning_rate": 6.628366819100586e-06, | |
| "loss": 0.5811495780944824, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.8901865369018653, | |
| "grad_norm": 0.5369722247123718, | |
| "learning_rate": 6.614908093326891e-06, | |
| "loss": 0.6311888694763184, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 1.8914841849148418, | |
| "grad_norm": 0.5656461119651794, | |
| "learning_rate": 6.601456289209362e-06, | |
| "loss": 0.6515893936157227, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.8927818329278183, | |
| "grad_norm": 0.5076130032539368, | |
| "learning_rate": 6.588011434253534e-06, | |
| "loss": 0.5477322340011597, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 1.8940794809407948, | |
| "grad_norm": 0.5373955965042114, | |
| "learning_rate": 6.574573555950738e-06, | |
| "loss": 0.5668719410896301, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.8953771289537713, | |
| "grad_norm": 0.5303026437759399, | |
| "learning_rate": 6.561142681778027e-06, | |
| "loss": 0.5856397747993469, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 1.8966747769667478, | |
| "grad_norm": 0.5287466049194336, | |
| "learning_rate": 6.547718839198145e-06, | |
| "loss": 0.574636697769165, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.8979724249797243, | |
| "grad_norm": 0.546556830406189, | |
| "learning_rate": 6.53430205565945e-06, | |
| "loss": 0.6119240522384644, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 1.8992700729927008, | |
| "grad_norm": 0.5332784652709961, | |
| "learning_rate": 6.520892358595869e-06, | |
| "loss": 0.6177451014518738, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.900567721005677, | |
| "grad_norm": 0.5086203217506409, | |
| "learning_rate": 6.507489775426834e-06, | |
| "loss": 0.6066810488700867, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 1.9018653690186538, | |
| "grad_norm": 0.5467303991317749, | |
| "learning_rate": 6.494094333557243e-06, | |
| "loss": 0.5971111059188843, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.90316301703163, | |
| "grad_norm": 0.5070620179176331, | |
| "learning_rate": 6.4807060603773795e-06, | |
| "loss": 0.6063017845153809, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 1.9044606650446068, | |
| "grad_norm": 0.553736686706543, | |
| "learning_rate": 6.467324983262877e-06, | |
| "loss": 0.579677402973175, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.905758313057583, | |
| "grad_norm": 0.5139430165290833, | |
| "learning_rate": 6.453951129574644e-06, | |
| "loss": 0.5715341567993164, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 1.9070559610705597, | |
| "grad_norm": 0.5478905439376831, | |
| "learning_rate": 6.4405845266588356e-06, | |
| "loss": 0.6066344976425171, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.908353609083536, | |
| "grad_norm": 0.5382056832313538, | |
| "learning_rate": 6.427225201846763e-06, | |
| "loss": 0.5792092084884644, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 1.9096512570965127, | |
| "grad_norm": 0.5592162013053894, | |
| "learning_rate": 6.413873182454873e-06, | |
| "loss": 0.6224773526191711, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.910948905109489, | |
| "grad_norm": 0.5435997843742371, | |
| "learning_rate": 6.4005284957846546e-06, | |
| "loss": 0.5740009546279907, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 1.9122465531224655, | |
| "grad_norm": 0.5480201840400696, | |
| "learning_rate": 6.3871911691226276e-06, | |
| "loss": 0.5897870063781738, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.913544201135442, | |
| "grad_norm": 0.5461702942848206, | |
| "learning_rate": 6.373861229740237e-06, | |
| "loss": 0.6223511695861816, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.9148418491484185, | |
| "grad_norm": 0.5337714552879333, | |
| "learning_rate": 6.360538704893845e-06, | |
| "loss": 0.5608541369438171, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.916139497161395, | |
| "grad_norm": 0.5573077201843262, | |
| "learning_rate": 6.3472236218246366e-06, | |
| "loss": 0.6532754302024841, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 1.9174371451743715, | |
| "grad_norm": 0.5389246940612793, | |
| "learning_rate": 6.333916007758591e-06, | |
| "loss": 0.5982533693313599, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.918734793187348, | |
| "grad_norm": 0.5433958768844604, | |
| "learning_rate": 6.320615889906403e-06, | |
| "loss": 0.592591404914856, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 1.9200324412003245, | |
| "grad_norm": 0.5413274765014648, | |
| "learning_rate": 6.307323295463457e-06, | |
| "loss": 0.6429393291473389, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.921330089213301, | |
| "grad_norm": 0.5350672602653503, | |
| "learning_rate": 6.294038251609738e-06, | |
| "loss": 0.5930889844894409, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 1.9226277372262772, | |
| "grad_norm": 0.5042331218719482, | |
| "learning_rate": 6.280760785509802e-06, | |
| "loss": 0.5509825944900513, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.923925385239254, | |
| "grad_norm": 0.5447627902030945, | |
| "learning_rate": 6.2674909243127e-06, | |
| "loss": 0.6052374839782715, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 1.9252230332522302, | |
| "grad_norm": 0.5395492911338806, | |
| "learning_rate": 6.254228695151949e-06, | |
| "loss": 0.6406330466270447, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.926520681265207, | |
| "grad_norm": 0.5140017867088318, | |
| "learning_rate": 6.240974125145443e-06, | |
| "loss": 0.5923643112182617, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 1.9278183292781832, | |
| "grad_norm": 0.5255963802337646, | |
| "learning_rate": 6.227727241395429e-06, | |
| "loss": 0.612221360206604, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.92911597729116, | |
| "grad_norm": 0.5396282076835632, | |
| "learning_rate": 6.214488070988424e-06, | |
| "loss": 0.5972959399223328, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 1.9304136253041362, | |
| "grad_norm": 0.5345456004142761, | |
| "learning_rate": 6.201256640995184e-06, | |
| "loss": 0.5695825815200806, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.931711273317113, | |
| "grad_norm": 0.5186867713928223, | |
| "learning_rate": 6.188032978470639e-06, | |
| "loss": 0.6117428541183472, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 1.9330089213300892, | |
| "grad_norm": 0.5213980674743652, | |
| "learning_rate": 6.174817110453828e-06, | |
| "loss": 0.584017276763916, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.9343065693430657, | |
| "grad_norm": 0.541926920413971, | |
| "learning_rate": 6.161609063967857e-06, | |
| "loss": 0.6257720589637756, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 1.9356042173560422, | |
| "grad_norm": 0.5566191673278809, | |
| "learning_rate": 6.1484088660198325e-06, | |
| "loss": 0.6734557151794434, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.9369018653690186, | |
| "grad_norm": 0.5532911419868469, | |
| "learning_rate": 6.135216543600828e-06, | |
| "loss": 0.5978685021400452, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 1.9381995133819951, | |
| "grad_norm": 0.5523790717124939, | |
| "learning_rate": 6.1220321236857974e-06, | |
| "loss": 0.6684085130691528, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.9394971613949716, | |
| "grad_norm": 0.5317186713218689, | |
| "learning_rate": 6.108855633233546e-06, | |
| "loss": 0.5903822183609009, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 1.9407948094079481, | |
| "grad_norm": 0.52325439453125, | |
| "learning_rate": 6.0956870991866545e-06, | |
| "loss": 0.5855342149734497, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.9420924574209246, | |
| "grad_norm": 0.5201572775840759, | |
| "learning_rate": 6.0825265484714526e-06, | |
| "loss": 0.5801212787628174, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 1.9433901054339011, | |
| "grad_norm": 0.5488981008529663, | |
| "learning_rate": 6.0693740079979235e-06, | |
| "loss": 0.647799015045166, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.9446877534468774, | |
| "grad_norm": 0.49936795234680176, | |
| "learning_rate": 6.056229504659696e-06, | |
| "loss": 0.5507512092590332, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 1.945985401459854, | |
| "grad_norm": 0.5403010249137878, | |
| "learning_rate": 6.043093065333945e-06, | |
| "loss": 0.5773292779922485, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.9472830494728304, | |
| "grad_norm": 0.532992422580719, | |
| "learning_rate": 6.029964716881367e-06, | |
| "loss": 0.561974048614502, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 1.948580697485807, | |
| "grad_norm": 0.5226876139640808, | |
| "learning_rate": 6.016844486146106e-06, | |
| "loss": 0.6117234230041504, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.9498783454987834, | |
| "grad_norm": 0.5627997517585754, | |
| "learning_rate": 6.003732399955722e-06, | |
| "loss": 0.5736496448516846, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 1.95117599351176, | |
| "grad_norm": 0.5260640382766724, | |
| "learning_rate": 5.990628485121106e-06, | |
| "loss": 0.5524093508720398, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.9524736415247363, | |
| "grad_norm": 0.5555213689804077, | |
| "learning_rate": 5.97753276843645e-06, | |
| "loss": 0.6590294241905212, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 1.9537712895377128, | |
| "grad_norm": 0.5117315053939819, | |
| "learning_rate": 5.964445276679176e-06, | |
| "loss": 0.5593676567077637, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.9550689375506893, | |
| "grad_norm": 0.5474593043327332, | |
| "learning_rate": 5.9513660366099005e-06, | |
| "loss": 0.5995163321495056, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 1.9563665855636658, | |
| "grad_norm": 0.5376996397972107, | |
| "learning_rate": 5.93829507497235e-06, | |
| "loss": 0.5445429086685181, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.9576642335766423, | |
| "grad_norm": 0.539804220199585, | |
| "learning_rate": 5.925232418493338e-06, | |
| "loss": 0.6023607850074768, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 1.9589618815896188, | |
| "grad_norm": 0.5308881402015686, | |
| "learning_rate": 5.912178093882688e-06, | |
| "loss": 0.5908794403076172, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.9602595296025953, | |
| "grad_norm": 0.5358856320381165, | |
| "learning_rate": 5.8991321278331934e-06, | |
| "loss": 0.5432258248329163, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 1.9615571776155718, | |
| "grad_norm": 0.5521926879882812, | |
| "learning_rate": 5.8860945470205466e-06, | |
| "loss": 0.6700773239135742, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.9628548256285483, | |
| "grad_norm": 0.5567953586578369, | |
| "learning_rate": 5.8730653781033085e-06, | |
| "loss": 0.6132399439811707, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 1.9641524736415248, | |
| "grad_norm": 0.5308123826980591, | |
| "learning_rate": 5.860044647722827e-06, | |
| "loss": 0.595048189163208, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.9654501216545013, | |
| "grad_norm": 0.5229505896568298, | |
| "learning_rate": 5.847032382503202e-06, | |
| "loss": 0.5752079486846924, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 1.9667477696674776, | |
| "grad_norm": 0.5336843729019165, | |
| "learning_rate": 5.834028609051218e-06, | |
| "loss": 0.6190193891525269, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.9680454176804543, | |
| "grad_norm": 0.5378988981246948, | |
| "learning_rate": 5.8210333539563e-06, | |
| "loss": 0.5807895660400391, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 1.9693430656934305, | |
| "grad_norm": 0.5520551800727844, | |
| "learning_rate": 5.808046643790468e-06, | |
| "loss": 0.6308130621910095, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.9706407137064073, | |
| "grad_norm": 0.5014427900314331, | |
| "learning_rate": 5.795068505108243e-06, | |
| "loss": 0.584097146987915, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 1.9719383617193835, | |
| "grad_norm": 0.5326021313667297, | |
| "learning_rate": 5.782098964446641e-06, | |
| "loss": 0.5909327268600464, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.9732360097323602, | |
| "grad_norm": 0.5124540328979492, | |
| "learning_rate": 5.769138048325087e-06, | |
| "loss": 0.5518309473991394, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 1.9745336577453365, | |
| "grad_norm": 0.5387500524520874, | |
| "learning_rate": 5.756185783245376e-06, | |
| "loss": 0.5835770964622498, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.975831305758313, | |
| "grad_norm": 0.568587064743042, | |
| "learning_rate": 5.743242195691612e-06, | |
| "loss": 0.5821942687034607, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 1.9771289537712895, | |
| "grad_norm": 0.5374230742454529, | |
| "learning_rate": 5.730307312130152e-06, | |
| "loss": 0.6571119427680969, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.978426601784266, | |
| "grad_norm": 0.5388919115066528, | |
| "learning_rate": 5.717381159009563e-06, | |
| "loss": 0.5895075798034668, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.9797242497972425, | |
| "grad_norm": 0.5499215722084045, | |
| "learning_rate": 5.704463762760559e-06, | |
| "loss": 0.61728835105896, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.981021897810219, | |
| "grad_norm": 0.5375927686691284, | |
| "learning_rate": 5.691555149795933e-06, | |
| "loss": 0.6732977032661438, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 1.9823195458231955, | |
| "grad_norm": 0.5313878655433655, | |
| "learning_rate": 5.678655346510549e-06, | |
| "loss": 0.61357581615448, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.983617193836172, | |
| "grad_norm": 0.5222123265266418, | |
| "learning_rate": 5.6657643792812265e-06, | |
| "loss": 0.5704218745231628, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 1.9849148418491485, | |
| "grad_norm": 0.5498616099357605, | |
| "learning_rate": 5.652882274466736e-06, | |
| "loss": 0.6428430080413818, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.986212489862125, | |
| "grad_norm": 0.5288700461387634, | |
| "learning_rate": 5.640009058407719e-06, | |
| "loss": 0.5776660442352295, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 1.9875101378751014, | |
| "grad_norm": 0.5719195008277893, | |
| "learning_rate": 5.627144757426647e-06, | |
| "loss": 0.6659935116767883, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.9888077858880777, | |
| "grad_norm": 0.5699102282524109, | |
| "learning_rate": 5.614289397827757e-06, | |
| "loss": 0.649441123008728, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 1.9901054339010544, | |
| "grad_norm": 0.5806236267089844, | |
| "learning_rate": 5.601443005897012e-06, | |
| "loss": 0.6462723016738892, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.9914030819140307, | |
| "grad_norm": 0.5485842823982239, | |
| "learning_rate": 5.588605607902017e-06, | |
| "loss": 0.6063494086265564, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 1.9927007299270074, | |
| "grad_norm": 0.5317525863647461, | |
| "learning_rate": 5.57577723009202e-06, | |
| "loss": 0.5641921162605286, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.9939983779399837, | |
| "grad_norm": 0.5366416573524475, | |
| "learning_rate": 5.5629578986977894e-06, | |
| "loss": 0.623965322971344, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 1.9952960259529604, | |
| "grad_norm": 0.5662190318107605, | |
| "learning_rate": 5.550147639931631e-06, | |
| "loss": 0.6340383291244507, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.9965936739659367, | |
| "grad_norm": 0.5266711711883545, | |
| "learning_rate": 5.537346479987269e-06, | |
| "loss": 0.6086807250976562, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 1.9978913219789132, | |
| "grad_norm": 0.5435559153556824, | |
| "learning_rate": 5.524554445039838e-06, | |
| "loss": 0.640510082244873, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.9991889699918897, | |
| "grad_norm": 0.5433489084243774, | |
| "learning_rate": 5.511771561245813e-06, | |
| "loss": 0.5800854563713074, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.6513635516166687, | |
| "learning_rate": 5.498997854742956e-06, | |
| "loss": 0.546117901802063, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.0012976480129763, | |
| "grad_norm": 0.7124117016792297, | |
| "learning_rate": 5.4862333516502634e-06, | |
| "loss": 0.5231295824050903, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 2.002595296025953, | |
| "grad_norm": 0.727088451385498, | |
| "learning_rate": 5.473478078067913e-06, | |
| "loss": 0.5810973644256592, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.0038929440389293, | |
| "grad_norm": 0.6788406372070312, | |
| "learning_rate": 5.460732060077212e-06, | |
| "loss": 0.47124871611595154, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 2.005190592051906, | |
| "grad_norm": 0.6010527610778809, | |
| "learning_rate": 5.44799532374054e-06, | |
| "loss": 0.5422745943069458, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.0064882400648822, | |
| "grad_norm": 0.609658420085907, | |
| "learning_rate": 5.435267895101303e-06, | |
| "loss": 0.48424142599105835, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 2.007785888077859, | |
| "grad_norm": 0.5703460574150085, | |
| "learning_rate": 5.422549800183861e-06, | |
| "loss": 0.5136675834655762, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.0090835360908352, | |
| "grad_norm": 0.5782158970832825, | |
| "learning_rate": 5.409841064993512e-06, | |
| "loss": 0.509381890296936, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 2.010381184103812, | |
| "grad_norm": 0.6222527623176575, | |
| "learning_rate": 5.39714171551639e-06, | |
| "loss": 0.4843388795852661, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.011678832116788, | |
| "grad_norm": 0.7037692666053772, | |
| "learning_rate": 5.384451777719464e-06, | |
| "loss": 0.5681462287902832, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 2.012976480129765, | |
| "grad_norm": 0.7455988526344299, | |
| "learning_rate": 5.371771277550432e-06, | |
| "loss": 0.551672101020813, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.014274128142741, | |
| "grad_norm": 0.7268160581588745, | |
| "learning_rate": 5.359100240937717e-06, | |
| "loss": 0.5382372140884399, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 2.015571776155718, | |
| "grad_norm": 0.6356255412101746, | |
| "learning_rate": 5.3464386937903764e-06, | |
| "loss": 0.5280675888061523, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.016869424168694, | |
| "grad_norm": 0.5975467562675476, | |
| "learning_rate": 5.33378666199807e-06, | |
| "loss": 0.47013112902641296, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 2.018167072181671, | |
| "grad_norm": 0.6236818432807922, | |
| "learning_rate": 5.321144171431003e-06, | |
| "loss": 0.4888884425163269, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.019464720194647, | |
| "grad_norm": 0.6166471838951111, | |
| "learning_rate": 5.308511247939872e-06, | |
| "loss": 0.5211419463157654, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 2.020762368207624, | |
| "grad_norm": 0.6095893383026123, | |
| "learning_rate": 5.295887917355794e-06, | |
| "loss": 0.5085535049438477, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.0220600162206, | |
| "grad_norm": 0.6039384007453918, | |
| "learning_rate": 5.283274205490303e-06, | |
| "loss": 0.4754714369773865, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 2.0233576642335764, | |
| "grad_norm": 0.6331435441970825, | |
| "learning_rate": 5.270670138135234e-06, | |
| "loss": 0.5521947145462036, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.024655312246553, | |
| "grad_norm": 0.6151823997497559, | |
| "learning_rate": 5.25807574106272e-06, | |
| "loss": 0.5278744697570801, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 2.0259529602595294, | |
| "grad_norm": 0.5749709606170654, | |
| "learning_rate": 5.245491040025115e-06, | |
| "loss": 0.4914984107017517, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.027250608272506, | |
| "grad_norm": 0.5855306386947632, | |
| "learning_rate": 5.232916060754947e-06, | |
| "loss": 0.5195509195327759, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 2.0285482562854824, | |
| "grad_norm": 0.5908445119857788, | |
| "learning_rate": 5.220350828964865e-06, | |
| "loss": 0.48390451073646545, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.029845904298459, | |
| "grad_norm": 0.5874761343002319, | |
| "learning_rate": 5.207795370347588e-06, | |
| "loss": 0.5324580669403076, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 2.0311435523114354, | |
| "grad_norm": 0.5893219709396362, | |
| "learning_rate": 5.195249710575853e-06, | |
| "loss": 0.5100334286689758, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.032441200324412, | |
| "grad_norm": 0.5876151919364929, | |
| "learning_rate": 5.182713875302361e-06, | |
| "loss": 0.4768049716949463, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 2.0337388483373884, | |
| "grad_norm": 0.6265038251876831, | |
| "learning_rate": 5.1701878901597106e-06, | |
| "loss": 0.5602673292160034, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.035036496350365, | |
| "grad_norm": 0.5975306034088135, | |
| "learning_rate": 5.157671780760385e-06, | |
| "loss": 0.5052694082260132, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 2.0363341443633414, | |
| "grad_norm": 0.5611022114753723, | |
| "learning_rate": 5.145165572696652e-06, | |
| "loss": 0.49101999402046204, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.037631792376318, | |
| "grad_norm": 0.5829542875289917, | |
| "learning_rate": 5.132669291540544e-06, | |
| "loss": 0.474854052066803, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 2.0389294403892944, | |
| "grad_norm": 0.5918568968772888, | |
| "learning_rate": 5.1201829628437926e-06, | |
| "loss": 0.4853309988975525, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.040227088402271, | |
| "grad_norm": 0.5785784125328064, | |
| "learning_rate": 5.107706612137776e-06, | |
| "loss": 0.5171955227851868, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 2.0415247364152473, | |
| "grad_norm": 0.5528171062469482, | |
| "learning_rate": 5.095240264933486e-06, | |
| "loss": 0.47794681787490845, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.042822384428224, | |
| "grad_norm": 0.5567626357078552, | |
| "learning_rate": 5.082783946721434e-06, | |
| "loss": 0.4940184950828552, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.0441200324412003, | |
| "grad_norm": 0.5630913376808167, | |
| "learning_rate": 5.070337682971642e-06, | |
| "loss": 0.5437344312667847, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.0454176804541766, | |
| "grad_norm": 0.5575384497642517, | |
| "learning_rate": 5.057901499133573e-06, | |
| "loss": 0.49236786365509033, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 2.0467153284671533, | |
| "grad_norm": 0.5638654828071594, | |
| "learning_rate": 5.0454754206360705e-06, | |
| "loss": 0.4736412465572357, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.0480129764801296, | |
| "grad_norm": 0.5577630996704102, | |
| "learning_rate": 5.033059472887322e-06, | |
| "loss": 0.5147624015808105, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 2.0493106244931063, | |
| "grad_norm": 0.5717137455940247, | |
| "learning_rate": 5.0206536812748004e-06, | |
| "loss": 0.4905228614807129, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.0506082725060826, | |
| "grad_norm": 0.5646504759788513, | |
| "learning_rate": 5.008258071165202e-06, | |
| "loss": 0.5036407113075256, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 2.0519059205190593, | |
| "grad_norm": 0.5792942047119141, | |
| "learning_rate": 4.995872667904424e-06, | |
| "loss": 0.5340180993080139, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.0532035685320356, | |
| "grad_norm": 0.573951244354248, | |
| "learning_rate": 4.98349749681747e-06, | |
| "loss": 0.4675467610359192, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 2.0545012165450123, | |
| "grad_norm": 0.5502886772155762, | |
| "learning_rate": 4.971132583208438e-06, | |
| "loss": 0.4816184937953949, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.0557988645579885, | |
| "grad_norm": 0.5748745203018188, | |
| "learning_rate": 4.958777952360445e-06, | |
| "loss": 0.49751102924346924, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 2.0570965125709653, | |
| "grad_norm": 0.593724250793457, | |
| "learning_rate": 4.946433629535585e-06, | |
| "loss": 0.48918506503105164, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.0583941605839415, | |
| "grad_norm": 0.5852590799331665, | |
| "learning_rate": 4.934099639974874e-06, | |
| "loss": 0.5142393708229065, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 2.0596918085969182, | |
| "grad_norm": 0.5500675439834595, | |
| "learning_rate": 4.921776008898198e-06, | |
| "loss": 0.43804582953453064, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.0609894566098945, | |
| "grad_norm": 0.572162389755249, | |
| "learning_rate": 4.909462761504264e-06, | |
| "loss": 0.5290922522544861, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 2.0622871046228712, | |
| "grad_norm": 0.5475997924804688, | |
| "learning_rate": 4.897159922970551e-06, | |
| "loss": 0.489504873752594, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.0635847526358475, | |
| "grad_norm": 0.5753741264343262, | |
| "learning_rate": 4.884867518453238e-06, | |
| "loss": 0.5394560694694519, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 2.0648824006488242, | |
| "grad_norm": 0.5752173662185669, | |
| "learning_rate": 4.872585573087195e-06, | |
| "loss": 0.5700497627258301, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.0661800486618005, | |
| "grad_norm": 0.5844142436981201, | |
| "learning_rate": 4.860314111985881e-06, | |
| "loss": 0.5502715110778809, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 2.0674776966747768, | |
| "grad_norm": 0.5586737990379333, | |
| "learning_rate": 4.848053160241333e-06, | |
| "loss": 0.48312538862228394, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.0687753446877535, | |
| "grad_norm": 0.5547072887420654, | |
| "learning_rate": 4.835802742924091e-06, | |
| "loss": 0.4890977442264557, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 2.0700729927007298, | |
| "grad_norm": 0.5696388483047485, | |
| "learning_rate": 4.823562885083161e-06, | |
| "loss": 0.5179868936538696, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.0713706407137065, | |
| "grad_norm": 0.5792607069015503, | |
| "learning_rate": 4.811333611745953e-06, | |
| "loss": 0.5098393559455872, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 2.0726682887266827, | |
| "grad_norm": 0.5769554972648621, | |
| "learning_rate": 4.799114947918238e-06, | |
| "loss": 0.4976171553134918, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.0739659367396595, | |
| "grad_norm": 0.6067489981651306, | |
| "learning_rate": 4.786906918584083e-06, | |
| "loss": 0.5139312148094177, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 2.0752635847526357, | |
| "grad_norm": 0.5910279750823975, | |
| "learning_rate": 4.774709548705831e-06, | |
| "loss": 0.5157588720321655, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.0765612327656124, | |
| "grad_norm": 0.5831329226493835, | |
| "learning_rate": 4.762522863224001e-06, | |
| "loss": 0.5141895413398743, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 2.0778588807785887, | |
| "grad_norm": 0.5735464692115784, | |
| "learning_rate": 4.750346887057292e-06, | |
| "loss": 0.47724485397338867, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.0791565287915654, | |
| "grad_norm": 0.5806788206100464, | |
| "learning_rate": 4.738181645102493e-06, | |
| "loss": 0.4755935072898865, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 2.0804541768045417, | |
| "grad_norm": 0.5973532199859619, | |
| "learning_rate": 4.726027162234434e-06, | |
| "loss": 0.5464816093444824, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.0817518248175184, | |
| "grad_norm": 0.5893049240112305, | |
| "learning_rate": 4.713883463305972e-06, | |
| "loss": 0.5293697118759155, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 2.0830494728304947, | |
| "grad_norm": 0.5956568717956543, | |
| "learning_rate": 4.701750573147885e-06, | |
| "loss": 0.5268076658248901, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.0843471208434714, | |
| "grad_norm": 0.5941202044487, | |
| "learning_rate": 4.689628516568866e-06, | |
| "loss": 0.526781439781189, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 2.0856447688564477, | |
| "grad_norm": 0.5724000334739685, | |
| "learning_rate": 4.677517318355455e-06, | |
| "loss": 0.5051593780517578, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.086942416869424, | |
| "grad_norm": 0.5567840933799744, | |
| "learning_rate": 4.6654170032719825e-06, | |
| "loss": 0.48566874861717224, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 2.0882400648824007, | |
| "grad_norm": 0.5653722882270813, | |
| "learning_rate": 4.6533275960605355e-06, | |
| "loss": 0.5071468353271484, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.0882400648824007, | |
| "eval_loss": 0.6963403820991516, | |
| "eval_runtime": 72.3826, | |
| "eval_samples_per_second": 71.73, | |
| "eval_steps_per_second": 8.966, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.089537712895377, | |
| "grad_norm": 0.5640507340431213, | |
| "learning_rate": 4.641249121440892e-06, | |
| "loss": 0.5107710361480713, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 2.0908353609083536, | |
| "grad_norm": 0.5841313004493713, | |
| "learning_rate": 4.629181604110464e-06, | |
| "loss": 0.5194936990737915, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.09213300892133, | |
| "grad_norm": 0.5427317023277283, | |
| "learning_rate": 4.617125068744288e-06, | |
| "loss": 0.44176995754241943, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 2.0934306569343066, | |
| "grad_norm": 0.6006700992584229, | |
| "learning_rate": 4.605079539994911e-06, | |
| "loss": 0.5314173102378845, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.094728304947283, | |
| "grad_norm": 0.5708412528038025, | |
| "learning_rate": 4.593045042492404e-06, | |
| "loss": 0.5313728451728821, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 2.0960259529602596, | |
| "grad_norm": 0.5850820541381836, | |
| "learning_rate": 4.581021600844258e-06, | |
| "loss": 0.4967271089553833, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.097323600973236, | |
| "grad_norm": 0.5869132280349731, | |
| "learning_rate": 4.569009239635374e-06, | |
| "loss": 0.5268970727920532, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 2.0986212489862126, | |
| "grad_norm": 0.5825201869010925, | |
| "learning_rate": 4.557007983427987e-06, | |
| "loss": 0.5315977334976196, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.099918896999189, | |
| "grad_norm": 0.5721443891525269, | |
| "learning_rate": 4.54501785676163e-06, | |
| "loss": 0.4732065498828888, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 2.1012165450121656, | |
| "grad_norm": 0.5872232913970947, | |
| "learning_rate": 4.533038884153077e-06, | |
| "loss": 0.5813014507293701, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.102514193025142, | |
| "grad_norm": 0.5751720666885376, | |
| "learning_rate": 4.521071090096298e-06, | |
| "loss": 0.4687768518924713, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 2.1038118410381186, | |
| "grad_norm": 0.5663445591926575, | |
| "learning_rate": 4.509114499062393e-06, | |
| "loss": 0.49182090163230896, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.105109489051095, | |
| "grad_norm": 0.5650926828384399, | |
| "learning_rate": 4.4971691354995795e-06, | |
| "loss": 0.5067583322525024, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 2.1064071370640716, | |
| "grad_norm": 0.6090897917747498, | |
| "learning_rate": 4.485235023833087e-06, | |
| "loss": 0.5684949159622192, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.107704785077048, | |
| "grad_norm": 0.6066005229949951, | |
| "learning_rate": 4.4733121884651665e-06, | |
| "loss": 0.5100910067558289, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.1090024330900246, | |
| "grad_norm": 0.5951321125030518, | |
| "learning_rate": 4.46140065377499e-06, | |
| "loss": 0.4774884283542633, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.110300081103001, | |
| "grad_norm": 0.5725848078727722, | |
| "learning_rate": 4.449500444118633e-06, | |
| "loss": 0.5018754005432129, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 2.111597729115977, | |
| "grad_norm": 0.5799410343170166, | |
| "learning_rate": 4.437611583829014e-06, | |
| "loss": 0.49752479791641235, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.112895377128954, | |
| "grad_norm": 0.5619634985923767, | |
| "learning_rate": 4.42573409721584e-06, | |
| "loss": 0.4756616950035095, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 2.11419302514193, | |
| "grad_norm": 0.5556355118751526, | |
| "learning_rate": 4.413868008565569e-06, | |
| "loss": 0.4895199239253998, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.115490673154907, | |
| "grad_norm": 0.5813250541687012, | |
| "learning_rate": 4.402013342141347e-06, | |
| "loss": 0.45987099409103394, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 2.116788321167883, | |
| "grad_norm": 0.5723846554756165, | |
| "learning_rate": 4.390170122182965e-06, | |
| "loss": 0.4845224916934967, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.11808596918086, | |
| "grad_norm": 0.5540896058082581, | |
| "learning_rate": 4.378338372906813e-06, | |
| "loss": 0.4948923587799072, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 2.119383617193836, | |
| "grad_norm": 0.61214679479599, | |
| "learning_rate": 4.3665181185058255e-06, | |
| "loss": 0.5314114093780518, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.1206812652068128, | |
| "grad_norm": 0.5635900497436523, | |
| "learning_rate": 4.354709383149421e-06, | |
| "loss": 0.4875974655151367, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 2.121978913219789, | |
| "grad_norm": 0.5833781957626343, | |
| "learning_rate": 4.342912190983487e-06, | |
| "loss": 0.5470179915428162, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.1232765612327658, | |
| "grad_norm": 0.5999435782432556, | |
| "learning_rate": 4.331126566130284e-06, | |
| "loss": 0.5479536056518555, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 2.124574209245742, | |
| "grad_norm": 0.589368999004364, | |
| "learning_rate": 4.319352532688444e-06, | |
| "loss": 0.5104061961174011, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.1258718572587187, | |
| "grad_norm": 0.5677252411842346, | |
| "learning_rate": 4.3075901147328745e-06, | |
| "loss": 0.5259417295455933, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 2.127169505271695, | |
| "grad_norm": 0.5625855326652527, | |
| "learning_rate": 4.295839336314749e-06, | |
| "loss": 0.49216002225875854, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.1284671532846717, | |
| "grad_norm": 0.5749784111976624, | |
| "learning_rate": 4.284100221461432e-06, | |
| "loss": 0.47341352701187134, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 2.129764801297648, | |
| "grad_norm": 0.5952023267745972, | |
| "learning_rate": 4.272372794176446e-06, | |
| "loss": 0.5849668979644775, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.1310624493106243, | |
| "grad_norm": 0.6117653250694275, | |
| "learning_rate": 4.260657078439409e-06, | |
| "loss": 0.5250235795974731, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 2.132360097323601, | |
| "grad_norm": 0.5717377662658691, | |
| "learning_rate": 4.248953098205997e-06, | |
| "loss": 0.49503540992736816, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.1336577453365773, | |
| "grad_norm": 0.5875842571258545, | |
| "learning_rate": 4.237260877407878e-06, | |
| "loss": 0.5329856872558594, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 2.134955393349554, | |
| "grad_norm": 0.5664336085319519, | |
| "learning_rate": 4.225580439952699e-06, | |
| "loss": 0.5302871465682983, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.1362530413625302, | |
| "grad_norm": 0.5786408185958862, | |
| "learning_rate": 4.213911809723987e-06, | |
| "loss": 0.49267759919166565, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 2.137550689375507, | |
| "grad_norm": 0.5607128143310547, | |
| "learning_rate": 4.20225501058114e-06, | |
| "loss": 0.5211464166641235, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.1388483373884832, | |
| "grad_norm": 0.5761646628379822, | |
| "learning_rate": 4.190610066359364e-06, | |
| "loss": 0.5178772211074829, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 2.14014598540146, | |
| "grad_norm": 0.5818209648132324, | |
| "learning_rate": 4.1789770008696205e-06, | |
| "loss": 0.5244809985160828, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.141443633414436, | |
| "grad_norm": 0.6208338141441345, | |
| "learning_rate": 4.167355837898585e-06, | |
| "loss": 0.5720170736312866, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 2.142741281427413, | |
| "grad_norm": 0.59494549036026, | |
| "learning_rate": 4.155746601208594e-06, | |
| "loss": 0.5233884453773499, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.144038929440389, | |
| "grad_norm": 0.5718002915382385, | |
| "learning_rate": 4.144149314537599e-06, | |
| "loss": 0.48552173376083374, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 2.145336577453366, | |
| "grad_norm": 0.5601415634155273, | |
| "learning_rate": 4.1325640015991185e-06, | |
| "loss": 0.4996642768383026, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.146634225466342, | |
| "grad_norm": 0.5795076489448547, | |
| "learning_rate": 4.120990686082174e-06, | |
| "loss": 0.5177854895591736, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 2.147931873479319, | |
| "grad_norm": 0.5665140151977539, | |
| "learning_rate": 4.109429391651283e-06, | |
| "loss": 0.46502965688705444, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.149229521492295, | |
| "grad_norm": 0.5985783934593201, | |
| "learning_rate": 4.097880141946354e-06, | |
| "loss": 0.4880366325378418, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 2.150527169505272, | |
| "grad_norm": 0.5875007510185242, | |
| "learning_rate": 4.08634296058268e-06, | |
| "loss": 0.4756428599357605, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.151824817518248, | |
| "grad_norm": 0.5694658160209656, | |
| "learning_rate": 4.074817871150887e-06, | |
| "loss": 0.5224863886833191, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 2.153122465531225, | |
| "grad_norm": 0.5686694979667664, | |
| "learning_rate": 4.063304897216856e-06, | |
| "loss": 0.4963817000389099, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.154420113544201, | |
| "grad_norm": 0.5916073322296143, | |
| "learning_rate": 4.051804062321706e-06, | |
| "loss": 0.5067265629768372, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 2.1557177615571774, | |
| "grad_norm": 0.5737749338150024, | |
| "learning_rate": 4.040315389981736e-06, | |
| "loss": 0.547669529914856, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.157015409570154, | |
| "grad_norm": 0.5631166696548462, | |
| "learning_rate": 4.028838903688372e-06, | |
| "loss": 0.5300416946411133, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 2.1583130575831304, | |
| "grad_norm": 0.5811983942985535, | |
| "learning_rate": 4.017374626908125e-06, | |
| "loss": 0.5100100040435791, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.159610705596107, | |
| "grad_norm": 0.571027934551239, | |
| "learning_rate": 4.005922583082538e-06, | |
| "loss": 0.5137525200843811, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 2.1609083536090834, | |
| "grad_norm": 0.5910731554031372, | |
| "learning_rate": 3.994482795628142e-06, | |
| "loss": 0.5244160890579224, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.16220600162206, | |
| "grad_norm": 0.5894386768341064, | |
| "learning_rate": 3.983055287936411e-06, | |
| "loss": 0.5517876148223877, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 2.1635036496350364, | |
| "grad_norm": 0.5779116153717041, | |
| "learning_rate": 3.971640083373696e-06, | |
| "loss": 0.5097295045852661, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.164801297648013, | |
| "grad_norm": 0.5987510085105896, | |
| "learning_rate": 3.960237205281213e-06, | |
| "loss": 0.511284589767456, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 2.1660989456609894, | |
| "grad_norm": 0.5853222608566284, | |
| "learning_rate": 3.948846676974953e-06, | |
| "loss": 0.5473302602767944, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.167396593673966, | |
| "grad_norm": 0.5716820359230042, | |
| "learning_rate": 3.937468521745666e-06, | |
| "loss": 0.4697805345058441, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 2.1686942416869424, | |
| "grad_norm": 0.5948668122291565, | |
| "learning_rate": 3.9261027628588e-06, | |
| "loss": 0.5532658100128174, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.169991889699919, | |
| "grad_norm": 0.5779493451118469, | |
| "learning_rate": 3.9147494235544544e-06, | |
| "loss": 0.495819091796875, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 2.1712895377128953, | |
| "grad_norm": 0.588945746421814, | |
| "learning_rate": 3.903408527047336e-06, | |
| "loss": 0.50020432472229, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.172587185725872, | |
| "grad_norm": 0.5889913439750671, | |
| "learning_rate": 3.892080096526707e-06, | |
| "loss": 0.5079851150512695, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.1738848337388483, | |
| "grad_norm": 0.5692569017410278, | |
| "learning_rate": 3.880764155156339e-06, | |
| "loss": 0.47483527660369873, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.1751824817518246, | |
| "grad_norm": 0.6015142202377319, | |
| "learning_rate": 3.8694607260744745e-06, | |
| "loss": 0.5588316321372986, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 2.1764801297648013, | |
| "grad_norm": 0.5825367569923401, | |
| "learning_rate": 3.858169832393752e-06, | |
| "loss": 0.5049576759338379, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.1777777777777776, | |
| "grad_norm": 0.6517031788825989, | |
| "learning_rate": 3.846891497201206e-06, | |
| "loss": 0.5698549151420593, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 2.1790754257907543, | |
| "grad_norm": 0.5972406268119812, | |
| "learning_rate": 3.835625743558168e-06, | |
| "loss": 0.5489758253097534, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.1803730738037306, | |
| "grad_norm": 0.590186595916748, | |
| "learning_rate": 3.824372594500256e-06, | |
| "loss": 0.5560799837112427, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 2.1816707218167073, | |
| "grad_norm": 0.6042253375053406, | |
| "learning_rate": 3.813132073037309e-06, | |
| "loss": 0.5188357830047607, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.1829683698296836, | |
| "grad_norm": 0.5862630605697632, | |
| "learning_rate": 3.8019042021533513e-06, | |
| "loss": 0.49817925691604614, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 2.1842660178426603, | |
| "grad_norm": 0.5700656175613403, | |
| "learning_rate": 3.7906890048065358e-06, | |
| "loss": 0.5223833322525024, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.1855636658556366, | |
| "grad_norm": 0.5849031805992126, | |
| "learning_rate": 3.779486503929106e-06, | |
| "loss": 0.5123599767684937, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 2.1868613138686133, | |
| "grad_norm": 0.5997171998023987, | |
| "learning_rate": 3.7682967224273317e-06, | |
| "loss": 0.5369530320167542, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.1881589618815895, | |
| "grad_norm": 0.5994778275489807, | |
| "learning_rate": 3.757119683181493e-06, | |
| "loss": 0.47989219427108765, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 2.1894566098945663, | |
| "grad_norm": 0.5771443247795105, | |
| "learning_rate": 3.7459554090458018e-06, | |
| "loss": 0.4408413767814636, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.1907542579075425, | |
| "grad_norm": 0.5725969672203064, | |
| "learning_rate": 3.7348039228483758e-06, | |
| "loss": 0.46296805143356323, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 2.1920519059205192, | |
| "grad_norm": 0.5743042826652527, | |
| "learning_rate": 3.7236652473911817e-06, | |
| "loss": 0.482837975025177, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.1933495539334955, | |
| "grad_norm": 0.5836053490638733, | |
| "learning_rate": 3.7125394054499843e-06, | |
| "loss": 0.5156795978546143, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 2.1946472019464722, | |
| "grad_norm": 0.5889219641685486, | |
| "learning_rate": 3.7014264197743267e-06, | |
| "loss": 0.5081969499588013, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.1959448499594485, | |
| "grad_norm": 0.6140073537826538, | |
| "learning_rate": 3.6903263130874423e-06, | |
| "loss": 0.5605005025863647, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 2.197242497972425, | |
| "grad_norm": 0.5697020292282104, | |
| "learning_rate": 3.679239108086241e-06, | |
| "loss": 0.5305500030517578, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.1985401459854015, | |
| "grad_norm": 0.5989742875099182, | |
| "learning_rate": 3.668164827441254e-06, | |
| "loss": 0.5370711088180542, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 2.1998377939983778, | |
| "grad_norm": 0.608519971370697, | |
| "learning_rate": 3.657103493796581e-06, | |
| "loss": 0.5120800137519836, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.2011354420113545, | |
| "grad_norm": 0.5787931084632874, | |
| "learning_rate": 3.6460551297698486e-06, | |
| "loss": 0.5016961693763733, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 2.2024330900243307, | |
| "grad_norm": 0.5809414982795715, | |
| "learning_rate": 3.6350197579521696e-06, | |
| "loss": 0.5177795886993408, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.2037307380373075, | |
| "grad_norm": 0.6027206778526306, | |
| "learning_rate": 3.6239974009080746e-06, | |
| "loss": 0.500653862953186, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 2.2050283860502837, | |
| "grad_norm": 0.5894326567649841, | |
| "learning_rate": 3.6129880811755093e-06, | |
| "loss": 0.5206901431083679, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.2063260340632604, | |
| "grad_norm": 0.591676652431488, | |
| "learning_rate": 3.601991821265731e-06, | |
| "loss": 0.49031156301498413, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 2.2076236820762367, | |
| "grad_norm": 0.567371666431427, | |
| "learning_rate": 3.591008643663323e-06, | |
| "loss": 0.49885687232017517, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.2089213300892134, | |
| "grad_norm": 0.5756494998931885, | |
| "learning_rate": 3.580038570826093e-06, | |
| "loss": 0.499514639377594, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 2.2102189781021897, | |
| "grad_norm": 0.5830073356628418, | |
| "learning_rate": 3.5690816251850657e-06, | |
| "loss": 0.4895148277282715, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.2115166261151664, | |
| "grad_norm": 0.6235371828079224, | |
| "learning_rate": 3.5581378291444223e-06, | |
| "loss": 0.5166549682617188, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 2.2128142741281427, | |
| "grad_norm": 0.5604133605957031, | |
| "learning_rate": 3.5472072050814565e-06, | |
| "loss": 0.4416266083717346, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.2141119221411194, | |
| "grad_norm": 0.5687461495399475, | |
| "learning_rate": 3.5362897753465265e-06, | |
| "loss": 0.48436877131462097, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 2.2154095701540957, | |
| "grad_norm": 0.5818923115730286, | |
| "learning_rate": 3.5253855622630174e-06, | |
| "loss": 0.5402669906616211, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.2167072181670724, | |
| "grad_norm": 0.6057185530662537, | |
| "learning_rate": 3.514494588127275e-06, | |
| "loss": 0.5666176080703735, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 2.2180048661800487, | |
| "grad_norm": 0.5755799412727356, | |
| "learning_rate": 3.5036168752085977e-06, | |
| "loss": 0.48957937955856323, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.219302514193025, | |
| "grad_norm": 0.5948247313499451, | |
| "learning_rate": 3.4927524457491456e-06, | |
| "loss": 0.4885704219341278, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 2.2206001622060016, | |
| "grad_norm": 0.5859489440917969, | |
| "learning_rate": 3.4819013219639295e-06, | |
| "loss": 0.4678208827972412, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.221897810218978, | |
| "grad_norm": 0.5540412068367004, | |
| "learning_rate": 3.471063526040752e-06, | |
| "loss": 0.481825053691864, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 2.2231954582319546, | |
| "grad_norm": 0.5437055826187134, | |
| "learning_rate": 3.460239080140163e-06, | |
| "loss": 0.4387455880641937, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.224493106244931, | |
| "grad_norm": 0.5966470241546631, | |
| "learning_rate": 3.4494280063954146e-06, | |
| "loss": 0.545790433883667, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 2.2257907542579076, | |
| "grad_norm": 0.5654957294464111, | |
| "learning_rate": 3.4386303269124142e-06, | |
| "loss": 0.4880921244621277, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.227088402270884, | |
| "grad_norm": 0.5839219689369202, | |
| "learning_rate": 3.4278460637696865e-06, | |
| "loss": 0.5272015333175659, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 2.2283860502838606, | |
| "grad_norm": 0.5752228498458862, | |
| "learning_rate": 3.4170752390183183e-06, | |
| "loss": 0.5249931812286377, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.229683698296837, | |
| "grad_norm": 0.580033540725708, | |
| "learning_rate": 3.4063178746819193e-06, | |
| "loss": 0.4954257309436798, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 2.2309813463098136, | |
| "grad_norm": 0.5703238844871521, | |
| "learning_rate": 3.395573992756579e-06, | |
| "loss": 0.502043604850769, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.23227899432279, | |
| "grad_norm": 0.5960628986358643, | |
| "learning_rate": 3.384843615210819e-06, | |
| "loss": 0.5299471616744995, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 2.2335766423357666, | |
| "grad_norm": 0.5959639549255371, | |
| "learning_rate": 3.3741267639855345e-06, | |
| "loss": 0.6064699292182922, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.234874290348743, | |
| "grad_norm": 0.5705887079238892, | |
| "learning_rate": 3.3634234609939888e-06, | |
| "loss": 0.49739521741867065, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 2.2361719383617196, | |
| "grad_norm": 0.5743765830993652, | |
| "learning_rate": 3.352733728121712e-06, | |
| "loss": 0.5017514228820801, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.237469586374696, | |
| "grad_norm": 0.5511932969093323, | |
| "learning_rate": 3.3420575872265184e-06, | |
| "loss": 0.4473830759525299, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.238767234387672, | |
| "grad_norm": 0.5601068139076233, | |
| "learning_rate": 3.3313950601384016e-06, | |
| "loss": 0.4705375134944916, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.240064882400649, | |
| "grad_norm": 0.5842630863189697, | |
| "learning_rate": 3.320746168659534e-06, | |
| "loss": 0.5488964319229126, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 2.241362530413625, | |
| "grad_norm": 0.5851315855979919, | |
| "learning_rate": 3.3101109345642056e-06, | |
| "loss": 0.4903653860092163, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.242660178426602, | |
| "grad_norm": 0.5913082361221313, | |
| "learning_rate": 3.299489379598777e-06, | |
| "loss": 0.5187092423439026, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 2.243957826439578, | |
| "grad_norm": 0.5963798761367798, | |
| "learning_rate": 3.288881525481639e-06, | |
| "loss": 0.5145666003227234, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.245255474452555, | |
| "grad_norm": 0.5765670537948608, | |
| "learning_rate": 3.278287393903172e-06, | |
| "loss": 0.47934818267822266, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 2.246553122465531, | |
| "grad_norm": 0.5776212215423584, | |
| "learning_rate": 3.2677070065256855e-06, | |
| "loss": 0.5102344751358032, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.247850770478508, | |
| "grad_norm": 0.5738791823387146, | |
| "learning_rate": 3.257140384983405e-06, | |
| "loss": 0.5097633600234985, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 2.249148418491484, | |
| "grad_norm": 0.5827375650405884, | |
| "learning_rate": 3.2465875508823876e-06, | |
| "loss": 0.49323970079421997, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.2504460665044608, | |
| "grad_norm": 0.5527526140213013, | |
| "learning_rate": 3.2360485258005115e-06, | |
| "loss": 0.47956135869026184, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 2.251743714517437, | |
| "grad_norm": 0.581285297870636, | |
| "learning_rate": 3.2255233312874155e-06, | |
| "loss": 0.5309310555458069, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.2530413625304138, | |
| "grad_norm": 0.6052958965301514, | |
| "learning_rate": 3.2150119888644594e-06, | |
| "loss": 0.5168576240539551, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 2.25433901054339, | |
| "grad_norm": 0.5458951592445374, | |
| "learning_rate": 3.2045145200246763e-06, | |
| "loss": 0.45663541555404663, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.2556366585563667, | |
| "grad_norm": 0.6066997647285461, | |
| "learning_rate": 3.1940309462327334e-06, | |
| "loss": 0.5442982912063599, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 2.256934306569343, | |
| "grad_norm": 0.5723252296447754, | |
| "learning_rate": 3.1835612889248868e-06, | |
| "loss": 0.5069276094436646, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.2582319545823197, | |
| "grad_norm": 0.571399986743927, | |
| "learning_rate": 3.1731055695089384e-06, | |
| "loss": 0.46238988637924194, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 2.259529602595296, | |
| "grad_norm": 0.5810062289237976, | |
| "learning_rate": 3.162663809364178e-06, | |
| "loss": 0.5127156972885132, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.2608272506082727, | |
| "grad_norm": 0.57572340965271, | |
| "learning_rate": 3.152236029841376e-06, | |
| "loss": 0.4930036664009094, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 2.262124898621249, | |
| "grad_norm": 0.580849826335907, | |
| "learning_rate": 3.1418222522626907e-06, | |
| "loss": 0.5655021071434021, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.2634225466342253, | |
| "grad_norm": 0.5487149953842163, | |
| "learning_rate": 3.1314224979216633e-06, | |
| "loss": 0.4654723107814789, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 2.264720194647202, | |
| "grad_norm": 0.5340819954872131, | |
| "learning_rate": 3.1210367880831684e-06, | |
| "loss": 0.4503304362297058, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.2660178426601782, | |
| "grad_norm": 0.5930841565132141, | |
| "learning_rate": 3.1106651439833434e-06, | |
| "loss": 0.5008471608161926, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 2.267315490673155, | |
| "grad_norm": 0.6097638010978699, | |
| "learning_rate": 3.1003075868295794e-06, | |
| "loss": 0.5474433898925781, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.2686131386861312, | |
| "grad_norm": 0.5703378319740295, | |
| "learning_rate": 3.0899641378004596e-06, | |
| "loss": 0.4988810420036316, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 2.269910786699108, | |
| "grad_norm": 0.5475755333900452, | |
| "learning_rate": 3.079634818045719e-06, | |
| "loss": 0.4420495927333832, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.2712084347120842, | |
| "grad_norm": 0.5802868008613586, | |
| "learning_rate": 3.069319648686202e-06, | |
| "loss": 0.4927031397819519, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 2.272506082725061, | |
| "grad_norm": 0.5564054846763611, | |
| "learning_rate": 3.0590186508138186e-06, | |
| "loss": 0.4879905581474304, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.273803730738037, | |
| "grad_norm": 0.5730741620063782, | |
| "learning_rate": 3.048731845491504e-06, | |
| "loss": 0.4577972888946533, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 2.275101378751014, | |
| "grad_norm": 0.5826799869537354, | |
| "learning_rate": 3.038459253753172e-06, | |
| "loss": 0.49198514223098755, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.27639902676399, | |
| "grad_norm": 0.5650803446769714, | |
| "learning_rate": 3.0282008966036647e-06, | |
| "loss": 0.48484641313552856, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 2.277696674776967, | |
| "grad_norm": 0.579980731010437, | |
| "learning_rate": 3.0179567950187396e-06, | |
| "loss": 0.4821101427078247, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.278994322789943, | |
| "grad_norm": 0.562907874584198, | |
| "learning_rate": 3.0077269699449795e-06, | |
| "loss": 0.47341495752334595, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 2.28029197080292, | |
| "grad_norm": 0.584148108959198, | |
| "learning_rate": 2.9975114422997932e-06, | |
| "loss": 0.48562386631965637, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.281589618815896, | |
| "grad_norm": 0.5975433588027954, | |
| "learning_rate": 2.9873102329713478e-06, | |
| "loss": 0.5041466951370239, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 2.2828872668288724, | |
| "grad_norm": 0.5545569062232971, | |
| "learning_rate": 2.9771233628185346e-06, | |
| "loss": 0.45113393664360046, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.284184914841849, | |
| "grad_norm": 0.5939710140228271, | |
| "learning_rate": 2.9669508526709256e-06, | |
| "loss": 0.550965428352356, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 2.285482562854826, | |
| "grad_norm": 0.6028052568435669, | |
| "learning_rate": 2.9567927233287307e-06, | |
| "loss": 0.5310263633728027, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.286780210867802, | |
| "grad_norm": 0.5738025903701782, | |
| "learning_rate": 2.9466489955627452e-06, | |
| "loss": 0.5576157569885254, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 2.2880778588807784, | |
| "grad_norm": 0.5776515007019043, | |
| "learning_rate": 2.936519690114338e-06, | |
| "loss": 0.4818328022956848, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.289375506893755, | |
| "grad_norm": 0.5612311363220215, | |
| "learning_rate": 2.9264048276953606e-06, | |
| "loss": 0.4919436573982239, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 2.2906731549067314, | |
| "grad_norm": 0.5739221572875977, | |
| "learning_rate": 2.9163044289881604e-06, | |
| "loss": 0.5123167634010315, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.291970802919708, | |
| "grad_norm": 0.5849712491035461, | |
| "learning_rate": 2.906218514645487e-06, | |
| "loss": 0.48645591735839844, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 2.2932684509326844, | |
| "grad_norm": 0.5921924114227295, | |
| "learning_rate": 2.8961471052904855e-06, | |
| "loss": 0.5228952169418335, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.294566098945661, | |
| "grad_norm": 0.5667364001274109, | |
| "learning_rate": 2.8860902215166374e-06, | |
| "loss": 0.4713795781135559, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 2.2958637469586374, | |
| "grad_norm": 0.5740687847137451, | |
| "learning_rate": 2.876047883887727e-06, | |
| "loss": 0.5572628974914551, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.297161394971614, | |
| "grad_norm": 0.5873590111732483, | |
| "learning_rate": 2.866020112937792e-06, | |
| "loss": 0.5043233036994934, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 2.2984590429845904, | |
| "grad_norm": 0.6047444343566895, | |
| "learning_rate": 2.8560069291710857e-06, | |
| "loss": 0.5389963984489441, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.299756690997567, | |
| "grad_norm": 0.5967015624046326, | |
| "learning_rate": 2.8460083530620342e-06, | |
| "loss": 0.5294721126556396, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 2.3010543390105433, | |
| "grad_norm": 0.549340546131134, | |
| "learning_rate": 2.8360244050551943e-06, | |
| "loss": 0.4317038357257843, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.30235198702352, | |
| "grad_norm": 0.5504307150840759, | |
| "learning_rate": 2.8260551055652154e-06, | |
| "loss": 0.529647946357727, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.3036496350364963, | |
| "grad_norm": 0.603110671043396, | |
| "learning_rate": 2.8161004749767893e-06, | |
| "loss": 0.5209970474243164, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.304947283049473, | |
| "grad_norm": 0.6039415001869202, | |
| "learning_rate": 2.8061605336446194e-06, | |
| "loss": 0.5043014287948608, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 2.3062449310624493, | |
| "grad_norm": 0.5883081555366516, | |
| "learning_rate": 2.796235301893362e-06, | |
| "loss": 0.4972041845321655, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.3075425790754256, | |
| "grad_norm": 0.5843275785446167, | |
| "learning_rate": 2.7863248000176146e-06, | |
| "loss": 0.4763846695423126, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 2.3088402270884023, | |
| "grad_norm": 0.5958689451217651, | |
| "learning_rate": 2.776429048281837e-06, | |
| "loss": 0.534402072429657, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.3101378751013786, | |
| "grad_norm": 0.5908694267272949, | |
| "learning_rate": 2.7665480669203383e-06, | |
| "loss": 0.5190926790237427, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 2.3114355231143553, | |
| "grad_norm": 0.5524806380271912, | |
| "learning_rate": 2.756681876137227e-06, | |
| "loss": 0.4656313359737396, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.3127331711273316, | |
| "grad_norm": 0.5877224206924438, | |
| "learning_rate": 2.7468304961063642e-06, | |
| "loss": 0.5328505635261536, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 2.3140308191403083, | |
| "grad_norm": 0.5791632533073425, | |
| "learning_rate": 2.736993946971329e-06, | |
| "loss": 0.49198758602142334, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.3153284671532846, | |
| "grad_norm": 0.5888563990592957, | |
| "learning_rate": 2.727172248845378e-06, | |
| "loss": 0.5110273957252502, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 2.3166261151662613, | |
| "grad_norm": 0.5828698873519897, | |
| "learning_rate": 2.717365421811389e-06, | |
| "loss": 0.5017109513282776, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.3179237631792375, | |
| "grad_norm": 0.5837040543556213, | |
| "learning_rate": 2.7075734859218526e-06, | |
| "loss": 0.48261111974716187, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 2.3192214111922143, | |
| "grad_norm": 0.5555887222290039, | |
| "learning_rate": 2.6977964611987885e-06, | |
| "loss": 0.47618377208709717, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.3205190592051905, | |
| "grad_norm": 0.5828522443771362, | |
| "learning_rate": 2.6880343676337485e-06, | |
| "loss": 0.5134596824645996, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 2.3218167072181672, | |
| "grad_norm": 0.5784159898757935, | |
| "learning_rate": 2.6782872251877347e-06, | |
| "loss": 0.5150825381278992, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.3231143552311435, | |
| "grad_norm": 0.5633057951927185, | |
| "learning_rate": 2.6685550537911886e-06, | |
| "loss": 0.5161488056182861, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 2.3244120032441202, | |
| "grad_norm": 0.6642704010009766, | |
| "learning_rate": 2.658837873343938e-06, | |
| "loss": 0.49425986409187317, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.3257096512570965, | |
| "grad_norm": 1.5263655185699463, | |
| "learning_rate": 2.6491357037151565e-06, | |
| "loss": 0.5067033767700195, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 2.3270072992700728, | |
| "grad_norm": 0.5753558278083801, | |
| "learning_rate": 2.639448564743328e-06, | |
| "loss": 0.5167245864868164, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.3283049472830495, | |
| "grad_norm": 0.576946496963501, | |
| "learning_rate": 2.6297764762362e-06, | |
| "loss": 0.4853561818599701, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 2.329602595296026, | |
| "grad_norm": 0.5866283774375916, | |
| "learning_rate": 2.6201194579707377e-06, | |
| "loss": 0.5048178434371948, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.3309002433090025, | |
| "grad_norm": 0.5844078660011292, | |
| "learning_rate": 2.6104775296931118e-06, | |
| "loss": 0.5524246096611023, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 2.3321978913219787, | |
| "grad_norm": 0.5873027443885803, | |
| "learning_rate": 2.6008507111186142e-06, | |
| "loss": 0.4834699034690857, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.3334955393349555, | |
| "grad_norm": 0.5751008987426758, | |
| "learning_rate": 2.5912390219316573e-06, | |
| "loss": 0.46085190773010254, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 2.3347931873479317, | |
| "grad_norm": 0.5933749675750732, | |
| "learning_rate": 2.5816424817857122e-06, | |
| "loss": 0.5757045745849609, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.3360908353609084, | |
| "grad_norm": 0.5685113668441772, | |
| "learning_rate": 2.572061110303271e-06, | |
| "loss": 0.5482950210571289, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 2.3373884833738847, | |
| "grad_norm": 0.5949112176895142, | |
| "learning_rate": 2.562494927075824e-06, | |
| "loss": 0.45071443915367126, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.3386861313868614, | |
| "grad_norm": 0.5924611687660217, | |
| "learning_rate": 2.552943951663782e-06, | |
| "loss": 0.5145446062088013, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 2.3399837793998377, | |
| "grad_norm": 0.6171916127204895, | |
| "learning_rate": 2.543408203596479e-06, | |
| "loss": 0.5408798456192017, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.3412814274128144, | |
| "grad_norm": 0.5777391791343689, | |
| "learning_rate": 2.5338877023721055e-06, | |
| "loss": 0.4972618818283081, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 2.3425790754257907, | |
| "grad_norm": 0.5500625371932983, | |
| "learning_rate": 2.5243824674576743e-06, | |
| "loss": 0.47741931676864624, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.3438767234387674, | |
| "grad_norm": 0.6426427960395813, | |
| "learning_rate": 2.514892518288988e-06, | |
| "loss": 0.4675457179546356, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 2.3451743714517437, | |
| "grad_norm": 0.5633028149604797, | |
| "learning_rate": 2.5054178742705936e-06, | |
| "loss": 0.4990037679672241, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.34647201946472, | |
| "grad_norm": 0.5860106945037842, | |
| "learning_rate": 2.4959585547757294e-06, | |
| "loss": 0.5247271060943604, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 2.3477696674776967, | |
| "grad_norm": 0.6035534143447876, | |
| "learning_rate": 2.486514579146322e-06, | |
| "loss": 0.5100830793380737, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.3490673154906734, | |
| "grad_norm": 0.5890262722969055, | |
| "learning_rate": 2.4770859666929027e-06, | |
| "loss": 0.4713430106639862, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 2.3503649635036497, | |
| "grad_norm": 0.5817517638206482, | |
| "learning_rate": 2.4676727366945995e-06, | |
| "loss": 0.5113362073898315, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.351662611516626, | |
| "grad_norm": 0.5895565748214722, | |
| "learning_rate": 2.4582749083990875e-06, | |
| "loss": 0.5131444931030273, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 2.3529602595296026, | |
| "grad_norm": 0.6126547455787659, | |
| "learning_rate": 2.448892501022544e-06, | |
| "loss": 0.5126985907554626, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.354257907542579, | |
| "grad_norm": 0.6138656139373779, | |
| "learning_rate": 2.4395255337496202e-06, | |
| "loss": 0.5113729238510132, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 2.3555555555555556, | |
| "grad_norm": 0.5864330530166626, | |
| "learning_rate": 2.4301740257333918e-06, | |
| "loss": 0.49038761854171753, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.356853203568532, | |
| "grad_norm": 0.5852108597755432, | |
| "learning_rate": 2.4208379960953255e-06, | |
| "loss": 0.5150374174118042, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 2.3581508515815086, | |
| "grad_norm": 0.5658332705497742, | |
| "learning_rate": 2.4115174639252425e-06, | |
| "loss": 0.45495855808258057, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.359448499594485, | |
| "grad_norm": 0.6017063856124878, | |
| "learning_rate": 2.4022124482812627e-06, | |
| "loss": 0.505713701248169, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 2.3607461476074616, | |
| "grad_norm": 0.5778226852416992, | |
| "learning_rate": 2.3929229681898005e-06, | |
| "loss": 0.5222234725952148, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.362043795620438, | |
| "grad_norm": 0.5651443004608154, | |
| "learning_rate": 2.3836490426454816e-06, | |
| "loss": 0.49572640657424927, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 2.3633414436334146, | |
| "grad_norm": 0.5689359307289124, | |
| "learning_rate": 2.3743906906111415e-06, | |
| "loss": 0.5316051840782166, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.364639091646391, | |
| "grad_norm": 0.5702098608016968, | |
| "learning_rate": 2.365147931017764e-06, | |
| "loss": 0.4997398257255554, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 2.3659367396593676, | |
| "grad_norm": 0.5760017037391663, | |
| "learning_rate": 2.355920782764455e-06, | |
| "loss": 0.48562324047088623, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.367234387672344, | |
| "grad_norm": 0.5816190242767334, | |
| "learning_rate": 2.3467092647183962e-06, | |
| "loss": 0.4969868063926697, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.3685320356853206, | |
| "grad_norm": 0.573274552822113, | |
| "learning_rate": 2.337513395714812e-06, | |
| "loss": 0.5109938383102417, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.369829683698297, | |
| "grad_norm": 0.6311878561973572, | |
| "learning_rate": 2.3283331945569256e-06, | |
| "loss": 0.5642886161804199, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 2.371127331711273, | |
| "grad_norm": 0.584414541721344, | |
| "learning_rate": 2.3191686800159272e-06, | |
| "loss": 0.4909813404083252, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.37242497972425, | |
| "grad_norm": 0.5963045954704285, | |
| "learning_rate": 2.310019870830923e-06, | |
| "loss": 0.5222618579864502, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 2.373722627737226, | |
| "grad_norm": 0.5990424752235413, | |
| "learning_rate": 2.300886785708919e-06, | |
| "loss": 0.527482271194458, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.375020275750203, | |
| "grad_norm": 0.5891411900520325, | |
| "learning_rate": 2.2917694433247626e-06, | |
| "loss": 0.5050874948501587, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 2.376317923763179, | |
| "grad_norm": 0.6118223071098328, | |
| "learning_rate": 2.282667862321104e-06, | |
| "loss": 0.5382136106491089, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.377615571776156, | |
| "grad_norm": 0.6039783358573914, | |
| "learning_rate": 2.2735820613083837e-06, | |
| "loss": 0.5693233013153076, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 2.378913219789132, | |
| "grad_norm": 0.5887247323989868, | |
| "learning_rate": 2.264512058864755e-06, | |
| "loss": 0.5109111666679382, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.3802108678021088, | |
| "grad_norm": 0.5879799723625183, | |
| "learning_rate": 2.2554578735360823e-06, | |
| "loss": 0.5213186740875244, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 2.381508515815085, | |
| "grad_norm": 0.5826606154441833, | |
| "learning_rate": 2.246419523835882e-06, | |
| "loss": 0.4647579789161682, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.3828061638280618, | |
| "grad_norm": 0.5773786306381226, | |
| "learning_rate": 2.2373970282452916e-06, | |
| "loss": 0.4783990681171417, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 2.384103811841038, | |
| "grad_norm": 0.5842030644416809, | |
| "learning_rate": 2.2283904052130313e-06, | |
| "loss": 0.5339592695236206, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.3854014598540147, | |
| "grad_norm": 0.569379985332489, | |
| "learning_rate": 2.2193996731553656e-06, | |
| "loss": 0.4958034157752991, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 2.386699107866991, | |
| "grad_norm": 0.6030622124671936, | |
| "learning_rate": 2.2104248504560643e-06, | |
| "loss": 0.4680197834968567, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.386699107866991, | |
| "eval_loss": 0.6960097551345825, | |
| "eval_runtime": 72.3931, | |
| "eval_samples_per_second": 71.72, | |
| "eval_steps_per_second": 8.965, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.3879967558799677, | |
| "grad_norm": 0.5678315758705139, | |
| "learning_rate": 2.2014659554663732e-06, | |
| "loss": 0.5050360560417175, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 2.389294403892944, | |
| "grad_norm": 0.5803557634353638, | |
| "learning_rate": 2.192523006504956e-06, | |
| "loss": 0.45793968439102173, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.3905920519059203, | |
| "grad_norm": 0.5823774933815002, | |
| "learning_rate": 2.183596021857891e-06, | |
| "loss": 0.4527888596057892, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 2.391889699918897, | |
| "grad_norm": 0.5696638226509094, | |
| "learning_rate": 2.1746850197785928e-06, | |
| "loss": 0.48019784688949585, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.3931873479318737, | |
| "grad_norm": 0.5827446579933167, | |
| "learning_rate": 2.16579001848781e-06, | |
| "loss": 0.5040067434310913, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 2.39448499594485, | |
| "grad_norm": 0.5871142148971558, | |
| "learning_rate": 2.156911036173568e-06, | |
| "loss": 0.47293055057525635, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.3957826439578263, | |
| "grad_norm": 0.558737576007843, | |
| "learning_rate": 2.1480480909911384e-06, | |
| "loss": 0.47470247745513916, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 2.397080291970803, | |
| "grad_norm": 0.5871817469596863, | |
| "learning_rate": 2.139201201062999e-06, | |
| "loss": 0.5189757347106934, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.3983779399837792, | |
| "grad_norm": 0.5788654088973999, | |
| "learning_rate": 2.130370384478807e-06, | |
| "loss": 0.49212944507598877, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 2.399675587996756, | |
| "grad_norm": 0.6011954545974731, | |
| "learning_rate": 2.1215556592953357e-06, | |
| "loss": 0.5247466564178467, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.4009732360097322, | |
| "grad_norm": 0.5478853583335876, | |
| "learning_rate": 2.11275704353648e-06, | |
| "loss": 0.4548777937889099, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 2.402270884022709, | |
| "grad_norm": 0.5758265852928162, | |
| "learning_rate": 2.10397455519317e-06, | |
| "loss": 0.5072181224822998, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.403568532035685, | |
| "grad_norm": 0.5652422308921814, | |
| "learning_rate": 2.095208212223383e-06, | |
| "loss": 0.524145245552063, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 2.404866180048662, | |
| "grad_norm": 0.5495245456695557, | |
| "learning_rate": 2.0864580325520623e-06, | |
| "loss": 0.47712084650993347, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.406163828061638, | |
| "grad_norm": 0.5936484932899475, | |
| "learning_rate": 2.077724034071116e-06, | |
| "loss": 0.5134607553482056, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 2.407461476074615, | |
| "grad_norm": 0.5818508863449097, | |
| "learning_rate": 2.069006234639357e-06, | |
| "loss": 0.46304088830947876, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.408759124087591, | |
| "grad_norm": 0.6046934723854065, | |
| "learning_rate": 2.060304652082481e-06, | |
| "loss": 0.5234611630439758, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 2.410056772100568, | |
| "grad_norm": 0.6409534215927124, | |
| "learning_rate": 2.051619304193022e-06, | |
| "loss": 0.5672463178634644, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.411354420113544, | |
| "grad_norm": 0.5750660300254822, | |
| "learning_rate": 2.0429502087303164e-06, | |
| "loss": 0.4885750710964203, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 2.412652068126521, | |
| "grad_norm": 0.6407312750816345, | |
| "learning_rate": 2.0342973834204715e-06, | |
| "loss": 0.4792509973049164, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.413949716139497, | |
| "grad_norm": 0.5465012192726135, | |
| "learning_rate": 2.0256608459563244e-06, | |
| "loss": 0.4969291388988495, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 2.4152473641524734, | |
| "grad_norm": 0.5713889002799988, | |
| "learning_rate": 2.017040613997412e-06, | |
| "loss": 0.48591309785842896, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.41654501216545, | |
| "grad_norm": 0.5666239857673645, | |
| "learning_rate": 2.008436705169917e-06, | |
| "loss": 0.44293344020843506, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 2.4178426601784264, | |
| "grad_norm": 0.5586820244789124, | |
| "learning_rate": 1.9998491370666684e-06, | |
| "loss": 0.45493143796920776, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.419140308191403, | |
| "grad_norm": 0.5613408088684082, | |
| "learning_rate": 1.991277927247056e-06, | |
| "loss": 0.49673575162887573, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 2.4204379562043794, | |
| "grad_norm": 0.5929522514343262, | |
| "learning_rate": 1.9827230932370467e-06, | |
| "loss": 0.5190791487693787, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.421735604217356, | |
| "grad_norm": 0.5624476075172424, | |
| "learning_rate": 1.9741846525291033e-06, | |
| "loss": 0.4601350724697113, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 2.4230332522303324, | |
| "grad_norm": 0.5859534740447998, | |
| "learning_rate": 1.9656626225821774e-06, | |
| "loss": 0.4977201819419861, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.424330900243309, | |
| "grad_norm": 0.5921490788459778, | |
| "learning_rate": 1.957157020821664e-06, | |
| "loss": 0.5139193534851074, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 2.4256285482562854, | |
| "grad_norm": 0.5974218845367432, | |
| "learning_rate": 1.9486678646393654e-06, | |
| "loss": 0.5071057081222534, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.426926196269262, | |
| "grad_norm": 0.5919764041900635, | |
| "learning_rate": 1.9401951713934574e-06, | |
| "loss": 0.49057209491729736, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 2.4282238442822384, | |
| "grad_norm": 0.5927568674087524, | |
| "learning_rate": 1.931738958408457e-06, | |
| "loss": 0.5092151165008545, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.429521492295215, | |
| "grad_norm": 0.5767861604690552, | |
| "learning_rate": 1.9232992429751694e-06, | |
| "loss": 0.4838736355304718, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 2.4308191403081914, | |
| "grad_norm": 0.5671409964561462, | |
| "learning_rate": 1.9148760423506884e-06, | |
| "loss": 0.4564237594604492, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.432116788321168, | |
| "grad_norm": 0.5710315108299255, | |
| "learning_rate": 1.9064693737583173e-06, | |
| "loss": 0.5324878096580505, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.4334144363341443, | |
| "grad_norm": 0.5930359959602356, | |
| "learning_rate": 1.8980792543875758e-06, | |
| "loss": 0.5325191617012024, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.4347120843471206, | |
| "grad_norm": 0.5865573287010193, | |
| "learning_rate": 1.8897057013941256e-06, | |
| "loss": 0.4776073694229126, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 2.4360097323600973, | |
| "grad_norm": 0.5611563920974731, | |
| "learning_rate": 1.8813487318997658e-06, | |
| "loss": 0.5060328841209412, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.437307380373074, | |
| "grad_norm": 0.5972756147384644, | |
| "learning_rate": 1.8730083629923857e-06, | |
| "loss": 0.4804626405239105, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 2.4386050283860503, | |
| "grad_norm": 0.5864998698234558, | |
| "learning_rate": 1.8646846117259277e-06, | |
| "loss": 0.49063995480537415, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.4399026763990266, | |
| "grad_norm": 0.5928548574447632, | |
| "learning_rate": 1.856377495120355e-06, | |
| "loss": 0.5291346311569214, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 2.4412003244120033, | |
| "grad_norm": 0.5551499724388123, | |
| "learning_rate": 1.8480870301616227e-06, | |
| "loss": 0.5005500912666321, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.4424979724249796, | |
| "grad_norm": 0.5950235724449158, | |
| "learning_rate": 1.839813233801626e-06, | |
| "loss": 0.5388972759246826, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 2.4437956204379563, | |
| "grad_norm": 0.5625823736190796, | |
| "learning_rate": 1.8315561229581925e-06, | |
| "loss": 0.49611175060272217, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.4450932684509326, | |
| "grad_norm": 0.5934765934944153, | |
| "learning_rate": 1.8233157145150183e-06, | |
| "loss": 0.5419527292251587, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 2.4463909164639093, | |
| "grad_norm": 0.5831634402275085, | |
| "learning_rate": 1.8150920253216542e-06, | |
| "loss": 0.5380743145942688, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.4476885644768855, | |
| "grad_norm": 0.5773998498916626, | |
| "learning_rate": 1.8068850721934639e-06, | |
| "loss": 0.5360612869262695, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 2.4489862124898623, | |
| "grad_norm": 0.5667778253555298, | |
| "learning_rate": 1.7986948719115872e-06, | |
| "loss": 0.4837849736213684, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.4502838605028385, | |
| "grad_norm": 0.5844002962112427, | |
| "learning_rate": 1.7905214412229177e-06, | |
| "loss": 0.5097035765647888, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 2.4515815085158152, | |
| "grad_norm": 0.571603536605835, | |
| "learning_rate": 1.7823647968400437e-06, | |
| "loss": 0.4986342787742615, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.4528791565287915, | |
| "grad_norm": 0.5814788341522217, | |
| "learning_rate": 1.7742249554412426e-06, | |
| "loss": 0.5466139316558838, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 2.4541768045417682, | |
| "grad_norm": 0.602313756942749, | |
| "learning_rate": 1.76610193367043e-06, | |
| "loss": 0.5179327726364136, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.4554744525547445, | |
| "grad_norm": 0.5728641748428345, | |
| "learning_rate": 1.757995748137129e-06, | |
| "loss": 0.4758206903934479, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 2.456772100567721, | |
| "grad_norm": 0.5834367871284485, | |
| "learning_rate": 1.7499064154164358e-06, | |
| "loss": 0.48661813139915466, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.4580697485806975, | |
| "grad_norm": 0.6014889478683472, | |
| "learning_rate": 1.7418339520489936e-06, | |
| "loss": 0.5374865531921387, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 2.4593673965936738, | |
| "grad_norm": 0.5678799152374268, | |
| "learning_rate": 1.7337783745409363e-06, | |
| "loss": 0.47202199697494507, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.4606650446066505, | |
| "grad_norm": 0.5770121216773987, | |
| "learning_rate": 1.7257396993638942e-06, | |
| "loss": 0.4832342565059662, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 2.4619626926196267, | |
| "grad_norm": 0.5571733713150024, | |
| "learning_rate": 1.717717942954914e-06, | |
| "loss": 0.5462654829025269, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.4632603406326035, | |
| "grad_norm": 0.5752882361412048, | |
| "learning_rate": 1.7097131217164598e-06, | |
| "loss": 0.5042911171913147, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 2.4645579886455797, | |
| "grad_norm": 0.5651837587356567, | |
| "learning_rate": 1.7017252520163652e-06, | |
| "loss": 0.5055532455444336, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.4658556366585564, | |
| "grad_norm": 0.5626855492591858, | |
| "learning_rate": 1.6937543501878018e-06, | |
| "loss": 0.5025293827056885, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 2.4671532846715327, | |
| "grad_norm": 0.5588532090187073, | |
| "learning_rate": 1.6858004325292466e-06, | |
| "loss": 0.5056187510490417, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.4684509326845094, | |
| "grad_norm": 0.6047312021255493, | |
| "learning_rate": 1.6778635153044486e-06, | |
| "loss": 0.5340344309806824, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 2.4697485806974857, | |
| "grad_norm": 0.5701199769973755, | |
| "learning_rate": 1.6699436147423942e-06, | |
| "loss": 0.47314453125, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.4710462287104624, | |
| "grad_norm": 0.5887412428855896, | |
| "learning_rate": 1.662040747037277e-06, | |
| "loss": 0.5806034207344055, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 2.4723438767234387, | |
| "grad_norm": 0.5856630206108093, | |
| "learning_rate": 1.654154928348455e-06, | |
| "loss": 0.542724609375, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.4736415247364154, | |
| "grad_norm": 0.5869402885437012, | |
| "learning_rate": 1.646286174800441e-06, | |
| "loss": 0.5193344354629517, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 2.4749391727493917, | |
| "grad_norm": 0.5962528586387634, | |
| "learning_rate": 1.6384345024828374e-06, | |
| "loss": 0.49579355120658875, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.4762368207623684, | |
| "grad_norm": 0.5732969641685486, | |
| "learning_rate": 1.6305999274503282e-06, | |
| "loss": 0.4678477346897125, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 2.4775344687753447, | |
| "grad_norm": 0.5851303339004517, | |
| "learning_rate": 1.6227824657226366e-06, | |
| "loss": 0.4453192949295044, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.478832116788321, | |
| "grad_norm": 0.5631725192070007, | |
| "learning_rate": 1.614982133284495e-06, | |
| "loss": 0.47414714097976685, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 2.4801297648012977, | |
| "grad_norm": 0.5917407274246216, | |
| "learning_rate": 1.6071989460856063e-06, | |
| "loss": 0.51967453956604, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.4814274128142744, | |
| "grad_norm": 0.5762115716934204, | |
| "learning_rate": 1.5994329200406223e-06, | |
| "loss": 0.47164011001586914, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 2.4827250608272506, | |
| "grad_norm": 0.5615324974060059, | |
| "learning_rate": 1.5916840710290937e-06, | |
| "loss": 0.5057311058044434, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.484022708840227, | |
| "grad_norm": 0.5691003203392029, | |
| "learning_rate": 1.5839524148954622e-06, | |
| "loss": 0.46432405710220337, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 2.4853203568532036, | |
| "grad_norm": 0.5725374221801758, | |
| "learning_rate": 1.5762379674490048e-06, | |
| "loss": 0.46116703748703003, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.48661800486618, | |
| "grad_norm": 0.6240981221199036, | |
| "learning_rate": 1.5685407444638146e-06, | |
| "loss": 0.5304262638092041, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 2.4879156528791566, | |
| "grad_norm": 0.5866638422012329, | |
| "learning_rate": 1.5608607616787663e-06, | |
| "loss": 0.46918168663978577, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.489213300892133, | |
| "grad_norm": 0.6103445291519165, | |
| "learning_rate": 1.553198034797474e-06, | |
| "loss": 0.5785281658172607, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 2.4905109489051096, | |
| "grad_norm": 0.5748964548110962, | |
| "learning_rate": 1.5455525794882841e-06, | |
| "loss": 0.47489288449287415, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.491808596918086, | |
| "grad_norm": 0.5849605202674866, | |
| "learning_rate": 1.5379244113842106e-06, | |
| "loss": 0.5081884860992432, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 2.4931062449310626, | |
| "grad_norm": 0.5827904343605042, | |
| "learning_rate": 1.53031354608293e-06, | |
| "loss": 0.5528438091278076, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.494403892944039, | |
| "grad_norm": 0.5817930102348328, | |
| "learning_rate": 1.5227199991467335e-06, | |
| "loss": 0.5150377154350281, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 2.4957015409570156, | |
| "grad_norm": 0.5756059288978577, | |
| "learning_rate": 1.5151437861025032e-06, | |
| "loss": 0.4410705268383026, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.496999188969992, | |
| "grad_norm": 0.5646528005599976, | |
| "learning_rate": 1.5075849224416783e-06, | |
| "loss": 0.5073448419570923, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.4982968369829686, | |
| "grad_norm": 0.5877253413200378, | |
| "learning_rate": 1.5000434236202211e-06, | |
| "loss": 0.5140043497085571, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.499594484995945, | |
| "grad_norm": 0.5703092813491821, | |
| "learning_rate": 1.4925193050585873e-06, | |
| "loss": 0.5106258392333984, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 2.5008921330089215, | |
| "grad_norm": 0.5841608643531799, | |
| "learning_rate": 1.4850125821416983e-06, | |
| "loss": 0.49111461639404297, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.502189781021898, | |
| "grad_norm": 0.5806940197944641, | |
| "learning_rate": 1.4775232702188947e-06, | |
| "loss": 0.477137953042984, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 2.503487429034874, | |
| "grad_norm": 0.5762841105461121, | |
| "learning_rate": 1.4700513846039332e-06, | |
| "loss": 0.4592735469341278, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.504785077047851, | |
| "grad_norm": 0.5808306932449341, | |
| "learning_rate": 1.4625969405749218e-06, | |
| "loss": 0.5200600624084473, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 2.5060827250608275, | |
| "grad_norm": 0.5846347212791443, | |
| "learning_rate": 1.4551599533743155e-06, | |
| "loss": 0.5185432434082031, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.507380373073804, | |
| "grad_norm": 0.6160796284675598, | |
| "learning_rate": 1.4477404382088689e-06, | |
| "loss": 0.5391091108322144, | |
| "step": 1933 | |
| }, | |
| { | |
| "epoch": 2.50867802108678, | |
| "grad_norm": 0.5582398176193237, | |
| "learning_rate": 1.4403384102496132e-06, | |
| "loss": 0.4704029858112335, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.509975669099757, | |
| "grad_norm": 0.5653654932975769, | |
| "learning_rate": 1.4329538846318225e-06, | |
| "loss": 0.524503231048584, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 2.511273317112733, | |
| "grad_norm": 0.5886475443840027, | |
| "learning_rate": 1.4255868764549852e-06, | |
| "loss": 0.4819219708442688, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.5125709651257098, | |
| "grad_norm": 0.5662146806716919, | |
| "learning_rate": 1.4182374007827605e-06, | |
| "loss": 0.5265961289405823, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 2.513868613138686, | |
| "grad_norm": 0.5975550413131714, | |
| "learning_rate": 1.410905472642975e-06, | |
| "loss": 0.5036963224411011, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.5151662611516628, | |
| "grad_norm": 0.5727776885032654, | |
| "learning_rate": 1.4035911070275576e-06, | |
| "loss": 0.4989280104637146, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 2.516463909164639, | |
| "grad_norm": 0.6097977161407471, | |
| "learning_rate": 1.3962943188925438e-06, | |
| "loss": 0.535049557685852, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.5177615571776153, | |
| "grad_norm": 0.5695138573646545, | |
| "learning_rate": 1.3890151231580117e-06, | |
| "loss": 0.5146960020065308, | |
| "step": 1941 | |
| }, | |
| { | |
| "epoch": 2.519059205190592, | |
| "grad_norm": 0.5890569686889648, | |
| "learning_rate": 1.3817535347080768e-06, | |
| "loss": 0.5350029468536377, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.5203568532035687, | |
| "grad_norm": 0.5916978120803833, | |
| "learning_rate": 1.3745095683908482e-06, | |
| "loss": 0.5213718414306641, | |
| "step": 1943 | |
| }, | |
| { | |
| "epoch": 2.521654501216545, | |
| "grad_norm": 0.5767956972122192, | |
| "learning_rate": 1.3672832390184042e-06, | |
| "loss": 0.506149411201477, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.5229521492295213, | |
| "grad_norm": 0.5916143655776978, | |
| "learning_rate": 1.3600745613667598e-06, | |
| "loss": 0.5128974318504333, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 2.524249797242498, | |
| "grad_norm": 0.5634325742721558, | |
| "learning_rate": 1.3528835501758365e-06, | |
| "loss": 0.5004685521125793, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.5255474452554747, | |
| "grad_norm": 0.5783470869064331, | |
| "learning_rate": 1.345710220149431e-06, | |
| "loss": 0.5014833807945251, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 2.526845093268451, | |
| "grad_norm": 0.5838568210601807, | |
| "learning_rate": 1.3385545859551886e-06, | |
| "loss": 0.540973424911499, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.5281427412814272, | |
| "grad_norm": 0.5862357020378113, | |
| "learning_rate": 1.3314166622245717e-06, | |
| "loss": 0.5124210715293884, | |
| "step": 1949 | |
| }, | |
| { | |
| "epoch": 2.529440389294404, | |
| "grad_norm": 0.5789701342582703, | |
| "learning_rate": 1.324296463552821e-06, | |
| "loss": 0.4796435236930847, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.5307380373073802, | |
| "grad_norm": 0.5998684167861938, | |
| "learning_rate": 1.3171940044989495e-06, | |
| "loss": 0.5745923519134521, | |
| "step": 1951 | |
| }, | |
| { | |
| "epoch": 2.532035685320357, | |
| "grad_norm": 0.5753020644187927, | |
| "learning_rate": 1.3101092995856802e-06, | |
| "loss": 0.4947076439857483, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.533333333333333, | |
| "grad_norm": 0.5820896029472351, | |
| "learning_rate": 1.3030423632994493e-06, | |
| "loss": 0.4961175322532654, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 2.53463098134631, | |
| "grad_norm": 0.5677821040153503, | |
| "learning_rate": 1.2959932100903472e-06, | |
| "loss": 0.49631717801094055, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.535928629359286, | |
| "grad_norm": 0.5767098665237427, | |
| "learning_rate": 1.2889618543721094e-06, | |
| "loss": 0.5189783573150635, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 2.537226277372263, | |
| "grad_norm": 0.5949708819389343, | |
| "learning_rate": 1.2819483105220798e-06, | |
| "loss": 0.5087240934371948, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.538523925385239, | |
| "grad_norm": 0.582380473613739, | |
| "learning_rate": 1.274952592881179e-06, | |
| "loss": 0.48820894956588745, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 2.539821573398216, | |
| "grad_norm": 0.578072726726532, | |
| "learning_rate": 1.2679747157538801e-06, | |
| "loss": 0.5089854598045349, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.541119221411192, | |
| "grad_norm": 0.5774610042572021, | |
| "learning_rate": 1.2610146934081768e-06, | |
| "loss": 0.49252915382385254, | |
| "step": 1959 | |
| }, | |
| { | |
| "epoch": 2.5424168694241684, | |
| "grad_norm": 0.58970707654953, | |
| "learning_rate": 1.2540725400755472e-06, | |
| "loss": 0.5605252981185913, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.543714517437145, | |
| "grad_norm": 0.566736102104187, | |
| "learning_rate": 1.2471482699509463e-06, | |
| "loss": 0.5428552627563477, | |
| "step": 1961 | |
| }, | |
| { | |
| "epoch": 2.545012165450122, | |
| "grad_norm": 0.5720308423042297, | |
| "learning_rate": 1.2402418971927487e-06, | |
| "loss": 0.5265427827835083, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.546309813463098, | |
| "grad_norm": 0.5800856351852417, | |
| "learning_rate": 1.2333534359227383e-06, | |
| "loss": 0.5138852596282959, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 2.5476074614760744, | |
| "grad_norm": 0.5780075788497925, | |
| "learning_rate": 1.226482900226077e-06, | |
| "loss": 0.48286569118499756, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.548905109489051, | |
| "grad_norm": 0.5666484236717224, | |
| "learning_rate": 1.2196303041512714e-06, | |
| "loss": 0.5184611082077026, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 2.550202757502028, | |
| "grad_norm": 0.5936673879623413, | |
| "learning_rate": 1.2127956617101445e-06, | |
| "loss": 0.5331882238388062, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.551500405515004, | |
| "grad_norm": 0.5658625364303589, | |
| "learning_rate": 1.2059789868778116e-06, | |
| "loss": 0.5007424354553223, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 2.5527980535279804, | |
| "grad_norm": 0.5596531629562378, | |
| "learning_rate": 1.1991802935926455e-06, | |
| "loss": 0.4455481767654419, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.554095701540957, | |
| "grad_norm": 0.5873602628707886, | |
| "learning_rate": 1.1923995957562585e-06, | |
| "loss": 0.4800918698310852, | |
| "step": 1969 | |
| }, | |
| { | |
| "epoch": 2.5553933495539334, | |
| "grad_norm": 0.5768440961837769, | |
| "learning_rate": 1.1856369072334517e-06, | |
| "loss": 0.5240867733955383, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.55669099756691, | |
| "grad_norm": 0.5888426899909973, | |
| "learning_rate": 1.178892241852222e-06, | |
| "loss": 0.4650096893310547, | |
| "step": 1971 | |
| }, | |
| { | |
| "epoch": 2.5579886455798864, | |
| "grad_norm": 0.5748341083526611, | |
| "learning_rate": 1.1721656134036962e-06, | |
| "loss": 0.5009864568710327, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.559286293592863, | |
| "grad_norm": 0.5902095437049866, | |
| "learning_rate": 1.165457035642128e-06, | |
| "loss": 0.5109707117080688, | |
| "step": 1973 | |
| }, | |
| { | |
| "epoch": 2.5605839416058394, | |
| "grad_norm": 0.6467815041542053, | |
| "learning_rate": 1.1587665222848643e-06, | |
| "loss": 0.4991541802883148, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.5618815896188156, | |
| "grad_norm": 0.5866140127182007, | |
| "learning_rate": 1.1520940870123065e-06, | |
| "loss": 0.48706984519958496, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.5631792376317923, | |
| "grad_norm": 0.5842229127883911, | |
| "learning_rate": 1.1454397434679022e-06, | |
| "loss": 0.5219037532806396, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.564476885644769, | |
| "grad_norm": 0.5731110572814941, | |
| "learning_rate": 1.1388035052580936e-06, | |
| "loss": 0.5115249156951904, | |
| "step": 1977 | |
| }, | |
| { | |
| "epoch": 2.5657745336577453, | |
| "grad_norm": 0.5784810185432434, | |
| "learning_rate": 1.1321853859523113e-06, | |
| "loss": 0.49307000637054443, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.5670721816707216, | |
| "grad_norm": 0.5523423552513123, | |
| "learning_rate": 1.1255853990829323e-06, | |
| "loss": 0.4534381031990051, | |
| "step": 1979 | |
| }, | |
| { | |
| "epoch": 2.5683698296836983, | |
| "grad_norm": 0.576626718044281, | |
| "learning_rate": 1.119003558145262e-06, | |
| "loss": 0.5025165677070618, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.569667477696675, | |
| "grad_norm": 0.6068827509880066, | |
| "learning_rate": 1.1124398765974976e-06, | |
| "loss": 0.5154992341995239, | |
| "step": 1981 | |
| }, | |
| { | |
| "epoch": 2.5709651257096513, | |
| "grad_norm": 0.5544149875640869, | |
| "learning_rate": 1.1058943678607082e-06, | |
| "loss": 0.4641039967536926, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.5722627737226276, | |
| "grad_norm": 0.591013491153717, | |
| "learning_rate": 1.0993670453187965e-06, | |
| "loss": 0.5354744791984558, | |
| "step": 1983 | |
| }, | |
| { | |
| "epoch": 2.5735604217356043, | |
| "grad_norm": 0.5729239583015442, | |
| "learning_rate": 1.0928579223184943e-06, | |
| "loss": 0.4895523190498352, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.5748580697485806, | |
| "grad_norm": 0.5629091858863831, | |
| "learning_rate": 1.0863670121693037e-06, | |
| "loss": 0.4998272955417633, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 2.5761557177615573, | |
| "grad_norm": 0.5692305564880371, | |
| "learning_rate": 1.0798943281434958e-06, | |
| "loss": 0.5316153764724731, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.5774533657745335, | |
| "grad_norm": 0.5875282287597656, | |
| "learning_rate": 1.0734398834760695e-06, | |
| "loss": 0.47188982367515564, | |
| "step": 1987 | |
| }, | |
| { | |
| "epoch": 2.5787510137875103, | |
| "grad_norm": 0.613525927066803, | |
| "learning_rate": 1.067003691364733e-06, | |
| "loss": 0.5325276851654053, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.5800486618004865, | |
| "grad_norm": 0.5971388816833496, | |
| "learning_rate": 1.060585764969867e-06, | |
| "loss": 0.5428590774536133, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 2.5813463098134632, | |
| "grad_norm": 0.5674665570259094, | |
| "learning_rate": 1.0541861174145097e-06, | |
| "loss": 0.47022098302841187, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.5826439578264395, | |
| "grad_norm": 0.5619399547576904, | |
| "learning_rate": 1.047804761784319e-06, | |
| "loss": 0.48155295848846436, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 2.5839416058394162, | |
| "grad_norm": 0.5751737952232361, | |
| "learning_rate": 1.0414417111275533e-06, | |
| "loss": 0.5390469431877136, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.5852392538523925, | |
| "grad_norm": 0.5782447457313538, | |
| "learning_rate": 1.0350969784550368e-06, | |
| "loss": 0.5048004984855652, | |
| "step": 1993 | |
| }, | |
| { | |
| "epoch": 2.5865369018653688, | |
| "grad_norm": 0.5656158328056335, | |
| "learning_rate": 1.028770576740148e-06, | |
| "loss": 0.5237029194831848, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.5878345498783455, | |
| "grad_norm": 0.568681538105011, | |
| "learning_rate": 1.022462518918772e-06, | |
| "loss": 0.4539422392845154, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 2.589132197891322, | |
| "grad_norm": 0.560100793838501, | |
| "learning_rate": 1.0161728178892928e-06, | |
| "loss": 0.45414865016937256, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.5904298459042985, | |
| "grad_norm": 0.5668950080871582, | |
| "learning_rate": 1.0099014865125557e-06, | |
| "loss": 0.4774186611175537, | |
| "step": 1997 | |
| }, | |
| { | |
| "epoch": 2.5917274939172747, | |
| "grad_norm": 0.606434166431427, | |
| "learning_rate": 1.0036485376118477e-06, | |
| "loss": 0.565065324306488, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.5930251419302515, | |
| "grad_norm": 0.5841239094734192, | |
| "learning_rate": 9.974139839728658e-07, | |
| "loss": 0.5483173131942749, | |
| "step": 1999 | |
| }, | |
| { | |
| "epoch": 2.5943227899432277, | |
| "grad_norm": 0.591903805732727, | |
| "learning_rate": 9.91197838343696e-07, | |
| "loss": 0.539207398891449, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.5956204379562045, | |
| "grad_norm": 0.5807414650917053, | |
| "learning_rate": 9.850001134347765e-07, | |
| "loss": 0.5179691314697266, | |
| "step": 2001 | |
| }, | |
| { | |
| "epoch": 2.5969180859691807, | |
| "grad_norm": 0.5769233107566833, | |
| "learning_rate": 9.788208219188932e-07, | |
| "loss": 0.4748839735984802, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.5982157339821574, | |
| "grad_norm": 0.5766239762306213, | |
| "learning_rate": 9.726599764311318e-07, | |
| "loss": 0.48025619983673096, | |
| "step": 2003 | |
| }, | |
| { | |
| "epoch": 2.5995133819951337, | |
| "grad_norm": 0.5754262208938599, | |
| "learning_rate": 9.665175895688594e-07, | |
| "loss": 0.47812211513519287, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.6008110300081104, | |
| "grad_norm": 0.5699096918106079, | |
| "learning_rate": 9.603936738917063e-07, | |
| "loss": 0.5337727069854736, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 2.6021086780210867, | |
| "grad_norm": 0.6039567589759827, | |
| "learning_rate": 9.54288241921525e-07, | |
| "loss": 0.5216813087463379, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.6034063260340634, | |
| "grad_norm": 0.5594240427017212, | |
| "learning_rate": 9.482013061423833e-07, | |
| "loss": 0.5251287221908569, | |
| "step": 2007 | |
| }, | |
| { | |
| "epoch": 2.6047039740470397, | |
| "grad_norm": 0.5856126546859741, | |
| "learning_rate": 9.421328790005213e-07, | |
| "loss": 0.5040426850318909, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.606001622060016, | |
| "grad_norm": 0.5794676542282104, | |
| "learning_rate": 9.360829729043375e-07, | |
| "loss": 0.5068378448486328, | |
| "step": 2009 | |
| }, | |
| { | |
| "epoch": 2.6072992700729927, | |
| "grad_norm": 0.5879704356193542, | |
| "learning_rate": 9.300516002243587e-07, | |
| "loss": 0.5116778016090393, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.6085969180859694, | |
| "grad_norm": 0.5978105068206787, | |
| "learning_rate": 9.240387732932155e-07, | |
| "loss": 0.525846004486084, | |
| "step": 2011 | |
| }, | |
| { | |
| "epoch": 2.6098945660989457, | |
| "grad_norm": 0.5788280367851257, | |
| "learning_rate": 9.180445044056164e-07, | |
| "loss": 0.5172775983810425, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.611192214111922, | |
| "grad_norm": 0.5901548862457275, | |
| "learning_rate": 9.120688058183269e-07, | |
| "loss": 0.5301088094711304, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 2.6124898621248986, | |
| "grad_norm": 0.5967061519622803, | |
| "learning_rate": 9.061116897501321e-07, | |
| "loss": 0.5318504571914673, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.6137875101378754, | |
| "grad_norm": 0.5555222034454346, | |
| "learning_rate": 9.001731683818338e-07, | |
| "loss": 0.5011588335037231, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 2.6150851581508516, | |
| "grad_norm": 0.613298237323761, | |
| "learning_rate": 8.942532538561988e-07, | |
| "loss": 0.5700482130050659, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.616382806163828, | |
| "grad_norm": 0.599183201789856, | |
| "learning_rate": 8.883519582779598e-07, | |
| "loss": 0.5524272322654724, | |
| "step": 2017 | |
| }, | |
| { | |
| "epoch": 2.6176804541768046, | |
| "grad_norm": 0.6120027899742126, | |
| "learning_rate": 8.82469293713768e-07, | |
| "loss": 0.47205424308776855, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.618978102189781, | |
| "grad_norm": 0.5907730460166931, | |
| "learning_rate": 8.766052721921858e-07, | |
| "loss": 0.507009744644165, | |
| "step": 2019 | |
| }, | |
| { | |
| "epoch": 2.6202757502027576, | |
| "grad_norm": 0.5603318810462952, | |
| "learning_rate": 8.70759905703652e-07, | |
| "loss": 0.48432788252830505, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.621573398215734, | |
| "grad_norm": 0.5962936282157898, | |
| "learning_rate": 8.649332062004622e-07, | |
| "loss": 0.4898841381072998, | |
| "step": 2021 | |
| }, | |
| { | |
| "epoch": 2.6228710462287106, | |
| "grad_norm": 0.7598771452903748, | |
| "learning_rate": 8.59125185596742e-07, | |
| "loss": 0.5321274995803833, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.624168694241687, | |
| "grad_norm": 0.5821399092674255, | |
| "learning_rate": 8.533358557684246e-07, | |
| "loss": 0.512812614440918, | |
| "step": 2023 | |
| }, | |
| { | |
| "epoch": 2.6254663422546636, | |
| "grad_norm": 0.5900049805641174, | |
| "learning_rate": 8.475652285532199e-07, | |
| "loss": 0.5129188299179077, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.62676399026764, | |
| "grad_norm": 0.5779396295547485, | |
| "learning_rate": 8.41813315750607e-07, | |
| "loss": 0.4839695394039154, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 2.6280616382806166, | |
| "grad_norm": 0.581840455532074, | |
| "learning_rate": 8.360801291217835e-07, | |
| "loss": 0.4942781925201416, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.629359286293593, | |
| "grad_norm": 0.5503793954849243, | |
| "learning_rate": 8.303656803896731e-07, | |
| "loss": 0.4754694700241089, | |
| "step": 2027 | |
| }, | |
| { | |
| "epoch": 2.630656934306569, | |
| "grad_norm": 0.5595881342887878, | |
| "learning_rate": 8.246699812388714e-07, | |
| "loss": 0.48087698221206665, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.631954582319546, | |
| "grad_norm": 0.5697108507156372, | |
| "learning_rate": 8.189930433156424e-07, | |
| "loss": 0.5032870173454285, | |
| "step": 2029 | |
| }, | |
| { | |
| "epoch": 2.6332522303325225, | |
| "grad_norm": 0.5761867761611938, | |
| "learning_rate": 8.133348782278916e-07, | |
| "loss": 0.5013032555580139, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.634549878345499, | |
| "grad_norm": 0.6058787703514099, | |
| "learning_rate": 8.07695497545129e-07, | |
| "loss": 0.44857025146484375, | |
| "step": 2031 | |
| }, | |
| { | |
| "epoch": 2.635847526358475, | |
| "grad_norm": 0.5961512327194214, | |
| "learning_rate": 8.020749127984629e-07, | |
| "loss": 0.5228594541549683, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.637145174371452, | |
| "grad_norm": 0.5766192078590393, | |
| "learning_rate": 7.964731354805677e-07, | |
| "loss": 0.4745315611362457, | |
| "step": 2033 | |
| }, | |
| { | |
| "epoch": 2.638442822384428, | |
| "grad_norm": 0.5896121859550476, | |
| "learning_rate": 7.908901770456579e-07, | |
| "loss": 0.519614577293396, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.639740470397405, | |
| "grad_norm": 0.5732361078262329, | |
| "learning_rate": 7.853260489094727e-07, | |
| "loss": 0.48370620608329773, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 2.641038118410381, | |
| "grad_norm": 0.5929004549980164, | |
| "learning_rate": 7.79780762449246e-07, | |
| "loss": 0.5153477191925049, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.6423357664233578, | |
| "grad_norm": 0.587020754814148, | |
| "learning_rate": 7.742543290036797e-07, | |
| "loss": 0.4829615652561188, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 2.643633414436334, | |
| "grad_norm": 0.5629860758781433, | |
| "learning_rate": 7.687467598729403e-07, | |
| "loss": 0.5223960876464844, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.6449310624493108, | |
| "grad_norm": 0.5553507208824158, | |
| "learning_rate": 7.63258066318604e-07, | |
| "loss": 0.4827447235584259, | |
| "step": 2039 | |
| }, | |
| { | |
| "epoch": 2.646228710462287, | |
| "grad_norm": 0.5940564274787903, | |
| "learning_rate": 7.577882595636665e-07, | |
| "loss": 0.538356602191925, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.6475263584752637, | |
| "grad_norm": 0.5712041258811951, | |
| "learning_rate": 7.523373507924947e-07, | |
| "loss": 0.48258891701698303, | |
| "step": 2041 | |
| }, | |
| { | |
| "epoch": 2.64882400648824, | |
| "grad_norm": 0.5664177536964417, | |
| "learning_rate": 7.469053511508184e-07, | |
| "loss": 0.4672595262527466, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.6501216545012163, | |
| "grad_norm": 0.6014147996902466, | |
| "learning_rate": 7.414922717457018e-07, | |
| "loss": 0.5549574494361877, | |
| "step": 2043 | |
| }, | |
| { | |
| "epoch": 2.651419302514193, | |
| "grad_norm": 0.588028073310852, | |
| "learning_rate": 7.360981236455222e-07, | |
| "loss": 0.5366802215576172, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.6527169505271697, | |
| "grad_norm": 0.5555592179298401, | |
| "learning_rate": 7.307229178799469e-07, | |
| "loss": 0.49787813425064087, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 2.654014598540146, | |
| "grad_norm": 0.5918477177619934, | |
| "learning_rate": 7.253666654399128e-07, | |
| "loss": 0.5271812081336975, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.6553122465531223, | |
| "grad_norm": 0.6544379591941833, | |
| "learning_rate": 7.200293772775968e-07, | |
| "loss": 0.5332372784614563, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 2.656609894566099, | |
| "grad_norm": 0.578555166721344, | |
| "learning_rate": 7.14711064306407e-07, | |
| "loss": 0.496245801448822, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.6579075425790757, | |
| "grad_norm": 0.5929746627807617, | |
| "learning_rate": 7.094117374009446e-07, | |
| "loss": 0.5187441110610962, | |
| "step": 2049 | |
| }, | |
| { | |
| "epoch": 2.659205190592052, | |
| "grad_norm": 0.5854722261428833, | |
| "learning_rate": 7.041314073969918e-07, | |
| "loss": 0.4945400655269623, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.6605028386050282, | |
| "grad_norm": 0.6011053323745728, | |
| "learning_rate": 6.988700850914876e-07, | |
| "loss": 0.48466387391090393, | |
| "step": 2051 | |
| }, | |
| { | |
| "epoch": 2.661800486618005, | |
| "grad_norm": 0.5774915814399719, | |
| "learning_rate": 6.93627781242504e-07, | |
| "loss": 0.5133316516876221, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.663098134630981, | |
| "grad_norm": 0.5776026248931885, | |
| "learning_rate": 6.884045065692257e-07, | |
| "loss": 0.5115536451339722, | |
| "step": 2053 | |
| }, | |
| { | |
| "epoch": 2.664395782643958, | |
| "grad_norm": 0.6011329293251038, | |
| "learning_rate": 6.83200271751927e-07, | |
| "loss": 0.5355618000030518, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.665693430656934, | |
| "grad_norm": 0.5973834991455078, | |
| "learning_rate": 6.780150874319524e-07, | |
| "loss": 0.5230112075805664, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 2.666991078669911, | |
| "grad_norm": 0.5917934775352478, | |
| "learning_rate": 6.72848964211692e-07, | |
| "loss": 0.5399461388587952, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.668288726682887, | |
| "grad_norm": 0.5736814141273499, | |
| "learning_rate": 6.677019126545548e-07, | |
| "loss": 0.49193501472473145, | |
| "step": 2057 | |
| }, | |
| { | |
| "epoch": 2.669586374695864, | |
| "grad_norm": 0.5814056396484375, | |
| "learning_rate": 6.625739432849643e-07, | |
| "loss": 0.5203338861465454, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.67088402270884, | |
| "grad_norm": 0.601714015007019, | |
| "learning_rate": 6.574650665883197e-07, | |
| "loss": 0.5449438095092773, | |
| "step": 2059 | |
| }, | |
| { | |
| "epoch": 2.672181670721817, | |
| "grad_norm": 0.5884926319122314, | |
| "learning_rate": 6.523752930109761e-07, | |
| "loss": 0.5138452053070068, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.673479318734793, | |
| "grad_norm": 0.5702131390571594, | |
| "learning_rate": 6.473046329602384e-07, | |
| "loss": 0.4545958638191223, | |
| "step": 2061 | |
| }, | |
| { | |
| "epoch": 2.6747769667477694, | |
| "grad_norm": 0.5839261412620544, | |
| "learning_rate": 6.422530968043173e-07, | |
| "loss": 0.5412476658821106, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.676074614760746, | |
| "grad_norm": 0.5880113244056702, | |
| "learning_rate": 6.372206948723292e-07, | |
| "loss": 0.5263261795043945, | |
| "step": 2063 | |
| }, | |
| { | |
| "epoch": 2.677372262773723, | |
| "grad_norm": 0.5763228535652161, | |
| "learning_rate": 6.322074374542608e-07, | |
| "loss": 0.5082492828369141, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.678669910786699, | |
| "grad_norm": 0.5878806710243225, | |
| "learning_rate": 6.272133348009546e-07, | |
| "loss": 0.5076773166656494, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 2.6799675587996754, | |
| "grad_norm": 0.5525650978088379, | |
| "learning_rate": 6.222383971240875e-07, | |
| "loss": 0.48154234886169434, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.681265206812652, | |
| "grad_norm": 0.6016013622283936, | |
| "learning_rate": 6.17282634596148e-07, | |
| "loss": 0.503459095954895, | |
| "step": 2067 | |
| }, | |
| { | |
| "epoch": 2.6825628548256284, | |
| "grad_norm": 0.6026131510734558, | |
| "learning_rate": 6.123460573504147e-07, | |
| "loss": 0.4821071922779083, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.683860502838605, | |
| "grad_norm": 0.5926850438117981, | |
| "learning_rate": 6.074286754809411e-07, | |
| "loss": 0.5161428451538086, | |
| "step": 2069 | |
| }, | |
| { | |
| "epoch": 2.6851581508515814, | |
| "grad_norm": 0.5853096842765808, | |
| "learning_rate": 6.025304990425241e-07, | |
| "loss": 0.5262787342071533, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.6851581508515814, | |
| "eval_loss": 0.6954009532928467, | |
| "eval_runtime": 72.3609, | |
| "eval_samples_per_second": 71.751, | |
| "eval_steps_per_second": 8.969, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.686455798864558, | |
| "grad_norm": 0.5976012945175171, | |
| "learning_rate": 5.976515380507008e-07, | |
| "loss": 0.5311732888221741, | |
| "step": 2071 | |
| }, | |
| { | |
| "epoch": 2.6877534468775344, | |
| "grad_norm": 0.5981724262237549, | |
| "learning_rate": 5.927918024817059e-07, | |
| "loss": 0.5703781247138977, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.689051094890511, | |
| "grad_norm": 0.5645772814750671, | |
| "learning_rate": 5.879513022724714e-07, | |
| "loss": 0.4812767505645752, | |
| "step": 2073 | |
| }, | |
| { | |
| "epoch": 2.6903487429034874, | |
| "grad_norm": 0.5886021852493286, | |
| "learning_rate": 5.831300473205948e-07, | |
| "loss": 0.5149608254432678, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.691646390916464, | |
| "grad_norm": 0.5895439982414246, | |
| "learning_rate": 5.783280474843222e-07, | |
| "loss": 0.5148745179176331, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 2.6929440389294403, | |
| "grad_norm": 0.571723461151123, | |
| "learning_rate": 5.735453125825275e-07, | |
| "loss": 0.5035296082496643, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.6942416869424166, | |
| "grad_norm": 0.6077845096588135, | |
| "learning_rate": 5.687818523946931e-07, | |
| "loss": 0.5260845422744751, | |
| "step": 2077 | |
| }, | |
| { | |
| "epoch": 2.6955393349553933, | |
| "grad_norm": 0.5872023105621338, | |
| "learning_rate": 5.640376766608902e-07, | |
| "loss": 0.49081629514694214, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.69683698296837, | |
| "grad_norm": 0.5637922286987305, | |
| "learning_rate": 5.593127950817579e-07, | |
| "loss": 0.49831029772758484, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 2.6981346309813463, | |
| "grad_norm": 0.588504433631897, | |
| "learning_rate": 5.546072173184791e-07, | |
| "loss": 0.5403261184692383, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.6994322789943226, | |
| "grad_norm": 0.5554431080818176, | |
| "learning_rate": 5.499209529927751e-07, | |
| "loss": 0.4801977872848511, | |
| "step": 2081 | |
| }, | |
| { | |
| "epoch": 2.7007299270072993, | |
| "grad_norm": 0.594923198223114, | |
| "learning_rate": 5.452540116868654e-07, | |
| "loss": 0.552370011806488, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.702027575020276, | |
| "grad_norm": 0.5900223851203918, | |
| "learning_rate": 5.406064029434666e-07, | |
| "loss": 0.5598849058151245, | |
| "step": 2083 | |
| }, | |
| { | |
| "epoch": 2.7033252230332523, | |
| "grad_norm": 0.5767436027526855, | |
| "learning_rate": 5.359781362657623e-07, | |
| "loss": 0.5048878192901611, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.7046228710462286, | |
| "grad_norm": 0.551128089427948, | |
| "learning_rate": 5.313692211173838e-07, | |
| "loss": 0.5155936479568481, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 2.7059205190592053, | |
| "grad_norm": 0.5880531072616577, | |
| "learning_rate": 5.26779666922399e-07, | |
| "loss": 0.5444161295890808, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.7072181670721815, | |
| "grad_norm": 0.5545855164527893, | |
| "learning_rate": 5.222094830652835e-07, | |
| "loss": 0.4949781894683838, | |
| "step": 2087 | |
| }, | |
| { | |
| "epoch": 2.7085158150851583, | |
| "grad_norm": 0.5254430174827576, | |
| "learning_rate": 5.176586788909066e-07, | |
| "loss": 0.48143208026885986, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.7098134630981345, | |
| "grad_norm": 0.5895472764968872, | |
| "learning_rate": 5.131272637045104e-07, | |
| "loss": 0.5467052459716797, | |
| "step": 2089 | |
| }, | |
| { | |
| "epoch": 2.7111111111111112, | |
| "grad_norm": 0.5603579878807068, | |
| "learning_rate": 5.086152467716932e-07, | |
| "loss": 0.48797622323036194, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.7124087591240875, | |
| "grad_norm": 0.5788029432296753, | |
| "learning_rate": 5.041226373183861e-07, | |
| "loss": 0.5119057297706604, | |
| "step": 2091 | |
| }, | |
| { | |
| "epoch": 2.7137064071370642, | |
| "grad_norm": 0.5590220093727112, | |
| "learning_rate": 4.996494445308409e-07, | |
| "loss": 0.46394574642181396, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.7150040551500405, | |
| "grad_norm": 0.5895569920539856, | |
| "learning_rate": 4.951956775556e-07, | |
| "loss": 0.4952976703643799, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 2.7163017031630172, | |
| "grad_norm": 0.5719903707504272, | |
| "learning_rate": 4.907613454994964e-07, | |
| "loss": 0.5015777349472046, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.7175993511759935, | |
| "grad_norm": 0.5849481821060181, | |
| "learning_rate": 4.863464574296106e-07, | |
| "loss": 0.5244485139846802, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 2.7188969991889698, | |
| "grad_norm": 0.5956225991249084, | |
| "learning_rate": 4.819510223732738e-07, | |
| "loss": 0.5492672324180603, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.7201946472019465, | |
| "grad_norm": 0.5836542844772339, | |
| "learning_rate": 4.775750493180386e-07, | |
| "loss": 0.48292914032936096, | |
| "step": 2097 | |
| }, | |
| { | |
| "epoch": 2.721492295214923, | |
| "grad_norm": 0.5966354012489319, | |
| "learning_rate": 4.7321854721166127e-07, | |
| "loss": 0.5208597183227539, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.7227899432278995, | |
| "grad_norm": 0.536894679069519, | |
| "learning_rate": 4.6888152496208593e-07, | |
| "loss": 0.4349246621131897, | |
| "step": 2099 | |
| }, | |
| { | |
| "epoch": 2.7240875912408757, | |
| "grad_norm": 0.589508593082428, | |
| "learning_rate": 4.645639914374278e-07, | |
| "loss": 0.5353684425354004, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.7253852392538525, | |
| "grad_norm": 0.5571612119674683, | |
| "learning_rate": 4.602659554659461e-07, | |
| "loss": 0.4614424705505371, | |
| "step": 2101 | |
| }, | |
| { | |
| "epoch": 2.7266828872668287, | |
| "grad_norm": 0.6046862602233887, | |
| "learning_rate": 4.559874258360408e-07, | |
| "loss": 0.5189507603645325, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.7279805352798054, | |
| "grad_norm": 0.5680896639823914, | |
| "learning_rate": 4.5172841129621726e-07, | |
| "loss": 0.5085829496383667, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 2.7292781832927817, | |
| "grad_norm": 0.5765218138694763, | |
| "learning_rate": 4.474889205550881e-07, | |
| "loss": 0.5140299797058105, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.7305758313057584, | |
| "grad_norm": 0.587651252746582, | |
| "learning_rate": 4.4326896228133354e-07, | |
| "loss": 0.4957928955554962, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 2.7318734793187347, | |
| "grad_norm": 0.5494794249534607, | |
| "learning_rate": 4.3906854510370245e-07, | |
| "loss": 0.5062738060951233, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.7331711273317114, | |
| "grad_norm": 0.5937455296516418, | |
| "learning_rate": 4.348876776109856e-07, | |
| "loss": 0.5094043016433716, | |
| "step": 2107 | |
| }, | |
| { | |
| "epoch": 2.7344687753446877, | |
| "grad_norm": 0.5641949772834778, | |
| "learning_rate": 4.307263683519969e-07, | |
| "loss": 0.48215553164482117, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.7357664233576644, | |
| "grad_norm": 0.5819230079650879, | |
| "learning_rate": 4.2658462583556216e-07, | |
| "loss": 0.5357835292816162, | |
| "step": 2109 | |
| }, | |
| { | |
| "epoch": 2.7370640713706407, | |
| "grad_norm": 0.5532712936401367, | |
| "learning_rate": 4.2246245853049706e-07, | |
| "loss": 0.47937077283859253, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.738361719383617, | |
| "grad_norm": 0.6110063195228577, | |
| "learning_rate": 4.1835987486558595e-07, | |
| "loss": 0.4744276702404022, | |
| "step": 2111 | |
| }, | |
| { | |
| "epoch": 2.7396593673965937, | |
| "grad_norm": 0.5573598146438599, | |
| "learning_rate": 4.142768832295807e-07, | |
| "loss": 0.5128625631332397, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.7409570154095704, | |
| "grad_norm": 0.5569184422492981, | |
| "learning_rate": 4.102134919711609e-07, | |
| "loss": 0.47407659888267517, | |
| "step": 2113 | |
| }, | |
| { | |
| "epoch": 2.7422546634225466, | |
| "grad_norm": 0.5868476629257202, | |
| "learning_rate": 4.061697093989347e-07, | |
| "loss": 0.5311683416366577, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.743552311435523, | |
| "grad_norm": 0.5694899559020996, | |
| "learning_rate": 4.021455437814148e-07, | |
| "loss": 0.4629291892051697, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 2.7448499594484996, | |
| "grad_norm": 0.5624482035636902, | |
| "learning_rate": 3.981410033469979e-07, | |
| "loss": 0.4855622351169586, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.7461476074614763, | |
| "grad_norm": 0.576919436454773, | |
| "learning_rate": 3.941560962839619e-07, | |
| "loss": 0.47935816645622253, | |
| "step": 2117 | |
| }, | |
| { | |
| "epoch": 2.7474452554744526, | |
| "grad_norm": 0.5966827869415283, | |
| "learning_rate": 3.9019083074042784e-07, | |
| "loss": 0.4561656415462494, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.748742903487429, | |
| "grad_norm": 0.5702851414680481, | |
| "learning_rate": 3.862452148243623e-07, | |
| "loss": 0.4796487092971802, | |
| "step": 2119 | |
| }, | |
| { | |
| "epoch": 2.7500405515004056, | |
| "grad_norm": 0.5755755305290222, | |
| "learning_rate": 3.823192566035494e-07, | |
| "loss": 0.5047421455383301, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.751338199513382, | |
| "grad_norm": 0.5769697427749634, | |
| "learning_rate": 3.7841296410558225e-07, | |
| "loss": 0.48532968759536743, | |
| "step": 2121 | |
| }, | |
| { | |
| "epoch": 2.7526358475263586, | |
| "grad_norm": 0.5873609781265259, | |
| "learning_rate": 3.7452634531783935e-07, | |
| "loss": 0.5122209787368774, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.753933495539335, | |
| "grad_norm": 0.5939727425575256, | |
| "learning_rate": 3.706594081874737e-07, | |
| "loss": 0.49794304370880127, | |
| "step": 2123 | |
| }, | |
| { | |
| "epoch": 2.7552311435523116, | |
| "grad_norm": 0.5834800601005554, | |
| "learning_rate": 3.6681216062138923e-07, | |
| "loss": 0.5340889096260071, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.756528791565288, | |
| "grad_norm": 0.576677680015564, | |
| "learning_rate": 3.6298461048623887e-07, | |
| "loss": 0.5236599445343018, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 2.757826439578264, | |
| "grad_norm": 0.5462478399276733, | |
| "learning_rate": 3.5917676560838775e-07, | |
| "loss": 0.47627806663513184, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.759124087591241, | |
| "grad_norm": 0.5982619524002075, | |
| "learning_rate": 3.5538863377392095e-07, | |
| "loss": 0.4933459460735321, | |
| "step": 2127 | |
| }, | |
| { | |
| "epoch": 2.7604217356042176, | |
| "grad_norm": 0.5802999138832092, | |
| "learning_rate": 3.5162022272860475e-07, | |
| "loss": 0.5381085872650146, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.761719383617194, | |
| "grad_norm": 0.5820630788803101, | |
| "learning_rate": 3.478715401778876e-07, | |
| "loss": 0.5177547931671143, | |
| "step": 2129 | |
| }, | |
| { | |
| "epoch": 2.76301703163017, | |
| "grad_norm": 0.6046480536460876, | |
| "learning_rate": 3.44142593786877e-07, | |
| "loss": 0.5715194940567017, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.764314679643147, | |
| "grad_norm": 0.5816249847412109, | |
| "learning_rate": 3.404333911803237e-07, | |
| "loss": 0.48858851194381714, | |
| "step": 2131 | |
| }, | |
| { | |
| "epoch": 2.7656123276561235, | |
| "grad_norm": 0.5709452629089355, | |
| "learning_rate": 3.367439399426087e-07, | |
| "loss": 0.5259594917297363, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.7669099756691, | |
| "grad_norm": 0.5610825419425964, | |
| "learning_rate": 3.330742476177273e-07, | |
| "loss": 0.49785754084587097, | |
| "step": 2133 | |
| }, | |
| { | |
| "epoch": 2.768207623682076, | |
| "grad_norm": 0.5751505494117737, | |
| "learning_rate": 3.2942432170926743e-07, | |
| "loss": 0.45043110847473145, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.769505271695053, | |
| "grad_norm": 0.5675750374794006, | |
| "learning_rate": 3.257941696804079e-07, | |
| "loss": 0.5171366930007935, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 2.770802919708029, | |
| "grad_norm": 0.5672844052314758, | |
| "learning_rate": 3.2218379895388896e-07, | |
| "loss": 0.467257022857666, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.7721005677210058, | |
| "grad_norm": 0.6082518696784973, | |
| "learning_rate": 3.185932169120043e-07, | |
| "loss": 0.5202172994613647, | |
| "step": 2137 | |
| }, | |
| { | |
| "epoch": 2.773398215733982, | |
| "grad_norm": 0.5631950497627258, | |
| "learning_rate": 3.150224308965866e-07, | |
| "loss": 0.5058823823928833, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.7746958637469588, | |
| "grad_norm": 0.6380532383918762, | |
| "learning_rate": 3.114714482089898e-07, | |
| "loss": 0.5831983089447021, | |
| "step": 2139 | |
| }, | |
| { | |
| "epoch": 2.775993511759935, | |
| "grad_norm": 0.5557391047477722, | |
| "learning_rate": 3.079402761100736e-07, | |
| "loss": 0.4567191004753113, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.7772911597729117, | |
| "grad_norm": 0.562920868396759, | |
| "learning_rate": 3.0442892182019236e-07, | |
| "loss": 0.4184800386428833, | |
| "step": 2141 | |
| }, | |
| { | |
| "epoch": 2.778588807785888, | |
| "grad_norm": 0.63033127784729, | |
| "learning_rate": 3.00937392519175e-07, | |
| "loss": 0.5374839901924133, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.7798864557988647, | |
| "grad_norm": 0.5735025405883789, | |
| "learning_rate": 2.974656953463173e-07, | |
| "loss": 0.4503205716609955, | |
| "step": 2143 | |
| }, | |
| { | |
| "epoch": 2.781184103811841, | |
| "grad_norm": 0.6051810383796692, | |
| "learning_rate": 2.9401383740035983e-07, | |
| "loss": 0.4981985092163086, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.7824817518248173, | |
| "grad_norm": 0.6038339734077454, | |
| "learning_rate": 2.905818257394799e-07, | |
| "loss": 0.5327208638191223, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 2.783779399837794, | |
| "grad_norm": 0.5686031579971313, | |
| "learning_rate": 2.871696673812718e-07, | |
| "loss": 0.4990962743759155, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.7850770478507707, | |
| "grad_norm": 0.5870386958122253, | |
| "learning_rate": 2.837773693027346e-07, | |
| "loss": 0.5274587869644165, | |
| "step": 2147 | |
| }, | |
| { | |
| "epoch": 2.786374695863747, | |
| "grad_norm": 0.6039890050888062, | |
| "learning_rate": 2.8040493844026185e-07, | |
| "loss": 0.4969175457954407, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.7876723438767232, | |
| "grad_norm": 0.5605257749557495, | |
| "learning_rate": 2.7705238168961867e-07, | |
| "loss": 0.466129869222641, | |
| "step": 2149 | |
| }, | |
| { | |
| "epoch": 2.7889699918897, | |
| "grad_norm": 0.5661087036132812, | |
| "learning_rate": 2.7371970590593597e-07, | |
| "loss": 0.5182359218597412, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.7902676399026762, | |
| "grad_norm": 0.6032746434211731, | |
| "learning_rate": 2.7040691790369165e-07, | |
| "loss": 0.4847348928451538, | |
| "step": 2151 | |
| }, | |
| { | |
| "epoch": 2.791565287915653, | |
| "grad_norm": 0.5873638987541199, | |
| "learning_rate": 2.671140244567005e-07, | |
| "loss": 0.4982571005821228, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.792862935928629, | |
| "grad_norm": 0.5877160429954529, | |
| "learning_rate": 2.6384103229809445e-07, | |
| "loss": 0.47337985038757324, | |
| "step": 2153 | |
| }, | |
| { | |
| "epoch": 2.794160583941606, | |
| "grad_norm": 0.6034269332885742, | |
| "learning_rate": 2.605879481203144e-07, | |
| "loss": 0.5359882116317749, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.795458231954582, | |
| "grad_norm": 0.5855337381362915, | |
| "learning_rate": 2.5735477857509406e-07, | |
| "loss": 0.48935824632644653, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 2.796755879967559, | |
| "grad_norm": 0.5761221647262573, | |
| "learning_rate": 2.5414153027344846e-07, | |
| "loss": 0.5092116594314575, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.798053527980535, | |
| "grad_norm": 0.5906012654304504, | |
| "learning_rate": 2.5094820978565416e-07, | |
| "loss": 0.4823336601257324, | |
| "step": 2157 | |
| }, | |
| { | |
| "epoch": 2.799351175993512, | |
| "grad_norm": 0.5929545164108276, | |
| "learning_rate": 2.4777482364124695e-07, | |
| "loss": 0.48247990012168884, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.800648824006488, | |
| "grad_norm": 0.5614597797393799, | |
| "learning_rate": 2.446213783289941e-07, | |
| "loss": 0.48732107877731323, | |
| "step": 2159 | |
| }, | |
| { | |
| "epoch": 2.8019464720194645, | |
| "grad_norm": 0.6198487281799316, | |
| "learning_rate": 2.4148788029689565e-07, | |
| "loss": 0.544142484664917, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.803244120032441, | |
| "grad_norm": 0.5842984318733215, | |
| "learning_rate": 2.3837433595216174e-07, | |
| "loss": 0.5269244313240051, | |
| "step": 2161 | |
| }, | |
| { | |
| "epoch": 2.804541768045418, | |
| "grad_norm": 0.5822996497154236, | |
| "learning_rate": 2.3528075166120323e-07, | |
| "loss": 0.49836334586143494, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.805839416058394, | |
| "grad_norm": 0.5670111775398254, | |
| "learning_rate": 2.3220713374961457e-07, | |
| "loss": 0.5108374357223511, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 2.8071370640713704, | |
| "grad_norm": 0.5872285962104797, | |
| "learning_rate": 2.2915348850216955e-07, | |
| "loss": 0.49880123138427734, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.808434712084347, | |
| "grad_norm": 0.5544793605804443, | |
| "learning_rate": 2.2611982216279693e-07, | |
| "loss": 0.5181583166122437, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 2.809732360097324, | |
| "grad_norm": 0.5830904245376587, | |
| "learning_rate": 2.2310614093457917e-07, | |
| "loss": 0.48121365904808044, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.8110300081103, | |
| "grad_norm": 0.6001294255256653, | |
| "learning_rate": 2.2011245097972812e-07, | |
| "loss": 0.500962495803833, | |
| "step": 2167 | |
| }, | |
| { | |
| "epoch": 2.8123276561232764, | |
| "grad_norm": 0.6160042881965637, | |
| "learning_rate": 2.171387584195861e-07, | |
| "loss": 0.5166311264038086, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.813625304136253, | |
| "grad_norm": 0.5664080381393433, | |
| "learning_rate": 2.1418506933459926e-07, | |
| "loss": 0.4849929213523865, | |
| "step": 2169 | |
| }, | |
| { | |
| "epoch": 2.8149229521492294, | |
| "grad_norm": 0.60596764087677, | |
| "learning_rate": 2.1125138976431425e-07, | |
| "loss": 0.5384441018104553, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.816220600162206, | |
| "grad_norm": 0.6017642617225647, | |
| "learning_rate": 2.0833772570736376e-07, | |
| "loss": 0.5182196497917175, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 2.8175182481751824, | |
| "grad_norm": 0.567242443561554, | |
| "learning_rate": 2.0544408312145325e-07, | |
| "loss": 0.5023871660232544, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.818815896188159, | |
| "grad_norm": 0.5743298530578613, | |
| "learning_rate": 2.025704679233498e-07, | |
| "loss": 0.4737445116043091, | |
| "step": 2173 | |
| }, | |
| { | |
| "epoch": 2.8201135442011354, | |
| "grad_norm": 0.5686278343200684, | |
| "learning_rate": 1.9971688598886874e-07, | |
| "loss": 0.4916064441204071, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.821411192214112, | |
| "grad_norm": 0.5849027037620544, | |
| "learning_rate": 1.9688334315286383e-07, | |
| "loss": 0.5161796808242798, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 2.8227088402270883, | |
| "grad_norm": 0.5709643959999084, | |
| "learning_rate": 1.9406984520921156e-07, | |
| "loss": 0.5027370452880859, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.824006488240065, | |
| "grad_norm": 0.6077797412872314, | |
| "learning_rate": 1.9127639791080345e-07, | |
| "loss": 0.561673641204834, | |
| "step": 2177 | |
| }, | |
| { | |
| "epoch": 2.8253041362530413, | |
| "grad_norm": 0.5836532711982727, | |
| "learning_rate": 1.885030069695326e-07, | |
| "loss": 0.5252400636672974, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.8266017842660176, | |
| "grad_norm": 0.5875435471534729, | |
| "learning_rate": 1.8574967805628174e-07, | |
| "loss": 0.5136289596557617, | |
| "step": 2179 | |
| }, | |
| { | |
| "epoch": 2.8278994322789943, | |
| "grad_norm": 0.5999600291252136, | |
| "learning_rate": 1.8301641680090965e-07, | |
| "loss": 0.5113690495491028, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.829197080291971, | |
| "grad_norm": 0.5720099210739136, | |
| "learning_rate": 1.8030322879224792e-07, | |
| "loss": 0.5277208089828491, | |
| "step": 2181 | |
| }, | |
| { | |
| "epoch": 2.8304947283049473, | |
| "grad_norm": 0.5587209463119507, | |
| "learning_rate": 1.7761011957807439e-07, | |
| "loss": 0.5302145481109619, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.8317923763179236, | |
| "grad_norm": 0.574344277381897, | |
| "learning_rate": 1.7493709466511965e-07, | |
| "loss": 0.5009472370147705, | |
| "step": 2183 | |
| }, | |
| { | |
| "epoch": 2.8330900243309003, | |
| "grad_norm": 0.5876274704933167, | |
| "learning_rate": 1.7228415951904165e-07, | |
| "loss": 0.49587976932525635, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.8343876723438766, | |
| "grad_norm": 0.5799663662910461, | |
| "learning_rate": 1.6965131956442004e-07, | |
| "loss": 0.5200576782226562, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 2.8356853203568533, | |
| "grad_norm": 0.5789362192153931, | |
| "learning_rate": 1.670385801847485e-07, | |
| "loss": 0.4996534585952759, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.8369829683698295, | |
| "grad_norm": 0.5791637897491455, | |
| "learning_rate": 1.6444594672241688e-07, | |
| "loss": 0.5251076221466064, | |
| "step": 2187 | |
| }, | |
| { | |
| "epoch": 2.8382806163828063, | |
| "grad_norm": 0.581289529800415, | |
| "learning_rate": 1.6187342447870235e-07, | |
| "loss": 0.47298407554626465, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.8395782643957825, | |
| "grad_norm": 0.5624388456344604, | |
| "learning_rate": 1.5932101871376503e-07, | |
| "loss": 0.48804956674575806, | |
| "step": 2189 | |
| }, | |
| { | |
| "epoch": 2.8408759124087593, | |
| "grad_norm": 0.5740110278129578, | |
| "learning_rate": 1.567887346466257e-07, | |
| "loss": 0.4583921730518341, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.8421735604217355, | |
| "grad_norm": 0.5799588561058044, | |
| "learning_rate": 1.54276577455168e-07, | |
| "loss": 0.5046111345291138, | |
| "step": 2191 | |
| }, | |
| { | |
| "epoch": 2.8434712084347122, | |
| "grad_norm": 0.5686801671981812, | |
| "learning_rate": 1.517845522761141e-07, | |
| "loss": 0.5424494743347168, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.8447688564476885, | |
| "grad_norm": 0.5737746953964233, | |
| "learning_rate": 1.4931266420502687e-07, | |
| "loss": 0.5258438587188721, | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 2.846066504460665, | |
| "grad_norm": 0.5844926238059998, | |
| "learning_rate": 1.468609182962899e-07, | |
| "loss": 0.5294222831726074, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.8473641524736415, | |
| "grad_norm": 0.6161758899688721, | |
| "learning_rate": 1.4442931956310525e-07, | |
| "loss": 0.48813527822494507, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 2.848661800486618, | |
| "grad_norm": 0.5877721905708313, | |
| "learning_rate": 1.420178729774746e-07, | |
| "loss": 0.5104416608810425, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.8499594484995945, | |
| "grad_norm": 0.607412576675415, | |
| "learning_rate": 1.3962658347019819e-07, | |
| "loss": 0.5552476644515991, | |
| "step": 2197 | |
| }, | |
| { | |
| "epoch": 2.8512570965125708, | |
| "grad_norm": 0.5500598549842834, | |
| "learning_rate": 1.372554559308559e-07, | |
| "loss": 0.5361748933792114, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.8525547445255475, | |
| "grad_norm": 0.5887991786003113, | |
| "learning_rate": 1.3490449520780492e-07, | |
| "loss": 0.5089778304100037, | |
| "step": 2199 | |
| }, | |
| { | |
| "epoch": 2.853852392538524, | |
| "grad_norm": 0.5767118334770203, | |
| "learning_rate": 1.3257370610816333e-07, | |
| "loss": 0.4646577537059784, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.8551500405515005, | |
| "grad_norm": 0.5947672128677368, | |
| "learning_rate": 1.3026309339780442e-07, | |
| "loss": 0.45190826058387756, | |
| "step": 2201 | |
| }, | |
| { | |
| "epoch": 2.8564476885644767, | |
| "grad_norm": 0.576164722442627, | |
| "learning_rate": 1.2797266180134994e-07, | |
| "loss": 0.47920286655426025, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.8577453365774534, | |
| "grad_norm": 0.5928218364715576, | |
| "learning_rate": 1.2570241600214805e-07, | |
| "loss": 0.4952476918697357, | |
| "step": 2203 | |
| }, | |
| { | |
| "epoch": 2.8590429845904297, | |
| "grad_norm": 0.5796513557434082, | |
| "learning_rate": 1.2345236064228216e-07, | |
| "loss": 0.4798247218132019, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.8603406326034064, | |
| "grad_norm": 0.6173388361930847, | |
| "learning_rate": 1.212225003225409e-07, | |
| "loss": 0.5353522300720215, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 2.8616382806163827, | |
| "grad_norm": 0.582225501537323, | |
| "learning_rate": 1.1901283960242704e-07, | |
| "loss": 0.4966939091682434, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.8629359286293594, | |
| "grad_norm": 0.573807954788208, | |
| "learning_rate": 1.168233830001364e-07, | |
| "loss": 0.5133891701698303, | |
| "step": 2207 | |
| }, | |
| { | |
| "epoch": 2.8642335766423357, | |
| "grad_norm": 0.5719092488288879, | |
| "learning_rate": 1.1465413499255452e-07, | |
| "loss": 0.5084906816482544, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.8655312246553124, | |
| "grad_norm": 0.563827395439148, | |
| "learning_rate": 1.1250510001524329e-07, | |
| "loss": 0.551742434501648, | |
| "step": 2209 | |
| }, | |
| { | |
| "epoch": 2.8668288726682887, | |
| "grad_norm": 0.5915552973747253, | |
| "learning_rate": 1.103762824624377e-07, | |
| "loss": 0.5108176469802856, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.8681265206812654, | |
| "grad_norm": 0.5619785189628601, | |
| "learning_rate": 1.0826768668702691e-07, | |
| "loss": 0.5008025169372559, | |
| "step": 2211 | |
| }, | |
| { | |
| "epoch": 2.8694241686942417, | |
| "grad_norm": 0.5829325914382935, | |
| "learning_rate": 1.0617931700055984e-07, | |
| "loss": 0.5187573432922363, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.870721816707218, | |
| "grad_norm": 0.6110272407531738, | |
| "learning_rate": 1.0411117767322065e-07, | |
| "loss": 0.5479835271835327, | |
| "step": 2213 | |
| }, | |
| { | |
| "epoch": 2.8720194647201946, | |
| "grad_norm": 0.5755971074104309, | |
| "learning_rate": 1.0206327293383222e-07, | |
| "loss": 0.5030970573425293, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.8733171127331714, | |
| "grad_norm": 0.5851888060569763, | |
| "learning_rate": 1.000356069698416e-07, | |
| "loss": 0.5171909928321838, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 2.8746147607461476, | |
| "grad_norm": 0.558315098285675, | |
| "learning_rate": 9.802818392731117e-08, | |
| "loss": 0.47078371047973633, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.875912408759124, | |
| "grad_norm": 0.6229851841926575, | |
| "learning_rate": 9.60410079109153e-08, | |
| "loss": 0.5632795095443726, | |
| "step": 2217 | |
| }, | |
| { | |
| "epoch": 2.8772100567721006, | |
| "grad_norm": 0.5876999497413635, | |
| "learning_rate": 9.407408298392373e-08, | |
| "loss": 0.5133551359176636, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.878507704785077, | |
| "grad_norm": 0.5872880220413208, | |
| "learning_rate": 9.212741316820039e-08, | |
| "loss": 0.4713757038116455, | |
| "step": 2219 | |
| }, | |
| { | |
| "epoch": 2.8798053527980536, | |
| "grad_norm": 0.5895143747329712, | |
| "learning_rate": 9.020100244419461e-08, | |
| "loss": 0.5900079607963562, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.88110300081103, | |
| "grad_norm": 0.5657681822776794, | |
| "learning_rate": 8.829485475092548e-08, | |
| "loss": 0.5136827230453491, | |
| "step": 2221 | |
| }, | |
| { | |
| "epoch": 2.8824006488240066, | |
| "grad_norm": 0.8106376528739929, | |
| "learning_rate": 8.640897398598525e-08, | |
| "loss": 0.6291136741638184, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.883698296836983, | |
| "grad_norm": 0.5875924825668335, | |
| "learning_rate": 8.454336400552154e-08, | |
| "loss": 0.4933609962463379, | |
| "step": 2223 | |
| }, | |
| { | |
| "epoch": 2.8849959448499596, | |
| "grad_norm": 0.5977309346199036, | |
| "learning_rate": 8.269802862423405e-08, | |
| "loss": 0.5197732448577881, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.886293592862936, | |
| "grad_norm": 0.5707021951675415, | |
| "learning_rate": 8.087297161536778e-08, | |
| "loss": 0.5037369132041931, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 2.8875912408759126, | |
| "grad_norm": 0.5633382797241211, | |
| "learning_rate": 7.906819671070098e-08, | |
| "loss": 0.4686581492424011, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 0.5665260553359985, | |
| "learning_rate": 7.728370760054283e-08, | |
| "loss": 0.4968178868293762, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 2.890186536901865, | |
| "grad_norm": 0.557956874370575, | |
| "learning_rate": 7.55195079337212e-08, | |
| "loss": 0.4842921793460846, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.891484184914842, | |
| "grad_norm": 0.5774162411689758, | |
| "learning_rate": 7.377560131757832e-08, | |
| "loss": 0.48150286078453064, | |
| "step": 2229 | |
| }, | |
| { | |
| "epoch": 2.8927818329278185, | |
| "grad_norm": 0.5605522990226746, | |
| "learning_rate": 7.205199131796182e-08, | |
| "loss": 0.47593769431114197, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.894079480940795, | |
| "grad_norm": 0.5713371634483337, | |
| "learning_rate": 7.034868145921802e-08, | |
| "loss": 0.5388371348381042, | |
| "step": 2231 | |
| }, | |
| { | |
| "epoch": 2.895377128953771, | |
| "grad_norm": 0.566564679145813, | |
| "learning_rate": 6.866567522418322e-08, | |
| "loss": 0.5253296494483948, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.896674776966748, | |
| "grad_norm": 0.6038841605186462, | |
| "learning_rate": 6.700297605418127e-08, | |
| "loss": 0.4850519895553589, | |
| "step": 2233 | |
| }, | |
| { | |
| "epoch": 2.8979724249797245, | |
| "grad_norm": 0.5850130915641785, | |
| "learning_rate": 6.53605873490093e-08, | |
| "loss": 0.526265025138855, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.899270072992701, | |
| "grad_norm": 0.5685164332389832, | |
| "learning_rate": 6.373851246693763e-08, | |
| "loss": 0.49016064405441284, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 2.900567721005677, | |
| "grad_norm": 0.585509717464447, | |
| "learning_rate": 6.21367547246976e-08, | |
| "loss": 0.49361756443977356, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.9018653690186538, | |
| "grad_norm": 0.5846717357635498, | |
| "learning_rate": 6.055531739747933e-08, | |
| "loss": 0.5073826313018799, | |
| "step": 2237 | |
| }, | |
| { | |
| "epoch": 2.90316301703163, | |
| "grad_norm": 0.6035211682319641, | |
| "learning_rate": 5.899420371892173e-08, | |
| "loss": 0.4748195707798004, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.9044606650446068, | |
| "grad_norm": 0.5725396275520325, | |
| "learning_rate": 5.745341688110806e-08, | |
| "loss": 0.49574536085128784, | |
| "step": 2239 | |
| }, | |
| { | |
| "epoch": 2.905758313057583, | |
| "grad_norm": 0.5700922012329102, | |
| "learning_rate": 5.593296003455595e-08, | |
| "loss": 0.4746463894844055, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.9070559610705597, | |
| "grad_norm": 0.5627117156982422, | |
| "learning_rate": 5.4432836288215165e-08, | |
| "loss": 0.512833833694458, | |
| "step": 2241 | |
| }, | |
| { | |
| "epoch": 2.908353609083536, | |
| "grad_norm": 0.5812812447547913, | |
| "learning_rate": 5.2953048709459834e-08, | |
| "loss": 0.48332545161247253, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.9096512570965127, | |
| "grad_norm": 0.5835334062576294, | |
| "learning_rate": 5.1493600324080684e-08, | |
| "loss": 0.507304847240448, | |
| "step": 2243 | |
| }, | |
| { | |
| "epoch": 2.910948905109489, | |
| "grad_norm": 0.5789167284965515, | |
| "learning_rate": 5.0054494116279497e-08, | |
| "loss": 0.5132785439491272, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.9122465531224657, | |
| "grad_norm": 0.5582759976387024, | |
| "learning_rate": 4.8635733028664644e-08, | |
| "loss": 0.4791605472564697, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 2.913544201135442, | |
| "grad_norm": 0.5968536138534546, | |
| "learning_rate": 4.723731996224446e-08, | |
| "loss": 0.5294557809829712, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.9148418491484183, | |
| "grad_norm": 0.5799421072006226, | |
| "learning_rate": 4.585925777641831e-08, | |
| "loss": 0.5392569303512573, | |
| "step": 2247 | |
| }, | |
| { | |
| "epoch": 2.916139497161395, | |
| "grad_norm": 0.5876581072807312, | |
| "learning_rate": 4.450154928897443e-08, | |
| "loss": 0.5044458508491516, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.9174371451743717, | |
| "grad_norm": 0.5795705914497375, | |
| "learning_rate": 4.316419727608434e-08, | |
| "loss": 0.518474280834198, | |
| "step": 2249 | |
| }, | |
| { | |
| "epoch": 2.918734793187348, | |
| "grad_norm": 0.5783658027648926, | |
| "learning_rate": 4.1847204472293954e-08, | |
| "loss": 0.5036035180091858, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.9200324412003242, | |
| "grad_norm": 0.5799797773361206, | |
| "learning_rate": 4.055057357052139e-08, | |
| "loss": 0.5075333118438721, | |
| "step": 2251 | |
| }, | |
| { | |
| "epoch": 2.921330089213301, | |
| "grad_norm": 0.5816603899002075, | |
| "learning_rate": 3.927430722204473e-08, | |
| "loss": 0.49955567717552185, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.9226277372262772, | |
| "grad_norm": 0.5603087544441223, | |
| "learning_rate": 3.801840803651091e-08, | |
| "loss": 0.4799802005290985, | |
| "step": 2253 | |
| }, | |
| { | |
| "epoch": 2.923925385239254, | |
| "grad_norm": 0.5984447598457336, | |
| "learning_rate": 3.678287858191132e-08, | |
| "loss": 0.4863054156303406, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.92522303325223, | |
| "grad_norm": 0.5684608817100525, | |
| "learning_rate": 3.5567721384593965e-08, | |
| "loss": 0.5202617645263672, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 2.926520681265207, | |
| "grad_norm": 0.6067941784858704, | |
| "learning_rate": 3.437293892924576e-08, | |
| "loss": 0.5111681818962097, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.927818329278183, | |
| "grad_norm": 0.6141681671142578, | |
| "learning_rate": 3.3198533658895804e-08, | |
| "loss": 0.5316765904426575, | |
| "step": 2257 | |
| }, | |
| { | |
| "epoch": 2.92911597729116, | |
| "grad_norm": 0.5799176096916199, | |
| "learning_rate": 3.2044507974905433e-08, | |
| "loss": 0.46131962537765503, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.930413625304136, | |
| "grad_norm": 0.5954794883728027, | |
| "learning_rate": 3.091086423696377e-08, | |
| "loss": 0.520176887512207, | |
| "step": 2259 | |
| }, | |
| { | |
| "epoch": 2.931711273317113, | |
| "grad_norm": 0.5652449131011963, | |
| "learning_rate": 2.9797604763087684e-08, | |
| "loss": 0.5085136890411377, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.933008921330089, | |
| "grad_norm": 0.5852287411689758, | |
| "learning_rate": 2.8704731829609643e-08, | |
| "loss": 0.5083173513412476, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 2.9343065693430654, | |
| "grad_norm": 0.5846629738807678, | |
| "learning_rate": 2.763224767117767e-08, | |
| "loss": 0.5292702913284302, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.935604217356042, | |
| "grad_norm": 0.5861793756484985, | |
| "learning_rate": 2.6580154480750907e-08, | |
| "loss": 0.5053665637969971, | |
| "step": 2263 | |
| }, | |
| { | |
| "epoch": 2.936901865369019, | |
| "grad_norm": 0.5602736473083496, | |
| "learning_rate": 2.554845440959408e-08, | |
| "loss": 0.5189537405967712, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.938199513381995, | |
| "grad_norm": 0.5991557240486145, | |
| "learning_rate": 2.4537149567271935e-08, | |
| "loss": 0.5867321491241455, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 2.9394971613949714, | |
| "grad_norm": 0.5465215444564819, | |
| "learning_rate": 2.3546242021648126e-08, | |
| "loss": 0.5084092617034912, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.940794809407948, | |
| "grad_norm": 0.6008067727088928, | |
| "learning_rate": 2.2575733798876342e-08, | |
| "loss": 0.5280360579490662, | |
| "step": 2267 | |
| }, | |
| { | |
| "epoch": 2.942092457420925, | |
| "grad_norm": 0.5549503564834595, | |
| "learning_rate": 2.162562688340142e-08, | |
| "loss": 0.4592389762401581, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.943390105433901, | |
| "grad_norm": 0.600985586643219, | |
| "learning_rate": 2.0695923217950442e-08, | |
| "loss": 0.5138071179389954, | |
| "step": 2269 | |
| }, | |
| { | |
| "epoch": 2.9446877534468774, | |
| "grad_norm": 0.5776973366737366, | |
| "learning_rate": 1.9786624703532764e-08, | |
| "loss": 0.560516357421875, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.945985401459854, | |
| "grad_norm": 0.5803866982460022, | |
| "learning_rate": 1.8897733199434443e-08, | |
| "loss": 0.48770207166671753, | |
| "step": 2271 | |
| }, | |
| { | |
| "epoch": 2.9472830494728304, | |
| "grad_norm": 0.5844945907592773, | |
| "learning_rate": 1.8029250523211582e-08, | |
| "loss": 0.5004736185073853, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.948580697485807, | |
| "grad_norm": 0.5826125144958496, | |
| "learning_rate": 1.718117845069367e-08, | |
| "loss": 0.4950000047683716, | |
| "step": 2273 | |
| }, | |
| { | |
| "epoch": 2.9498783454987834, | |
| "grad_norm": 0.5776214003562927, | |
| "learning_rate": 1.635351871597246e-08, | |
| "loss": 0.5560945868492126, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.95117599351176, | |
| "grad_norm": 0.565700352191925, | |
| "learning_rate": 1.554627301140199e-08, | |
| "loss": 0.4630610942840576, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 2.9524736415247363, | |
| "grad_norm": 0.5994547605514526, | |
| "learning_rate": 1.4759442987596351e-08, | |
| "loss": 0.5141358375549316, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.9537712895377126, | |
| "grad_norm": 0.573093831539154, | |
| "learning_rate": 1.3993030253423023e-08, | |
| "loss": 0.4815256893634796, | |
| "step": 2277 | |
| }, | |
| { | |
| "epoch": 2.9550689375506893, | |
| "grad_norm": 0.5978487730026245, | |
| "learning_rate": 1.3247036376002886e-08, | |
| "loss": 0.5149579048156738, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.956366585563666, | |
| "grad_norm": 0.6069895625114441, | |
| "learning_rate": 1.252146288070355e-08, | |
| "loss": 0.5201846361160278, | |
| "step": 2279 | |
| }, | |
| { | |
| "epoch": 2.9576642335766423, | |
| "grad_norm": 0.5879092216491699, | |
| "learning_rate": 1.1816311251140466e-08, | |
| "loss": 0.5039907693862915, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.9589618815896186, | |
| "grad_norm": 0.5550662875175476, | |
| "learning_rate": 1.113158292916916e-08, | |
| "loss": 0.5198723077774048, | |
| "step": 2281 | |
| }, | |
| { | |
| "epoch": 2.9602595296025953, | |
| "grad_norm": 0.5664054155349731, | |
| "learning_rate": 1.0467279314886336e-08, | |
| "loss": 0.5281890630722046, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.961557177615572, | |
| "grad_norm": 0.5738133788108826, | |
| "learning_rate": 9.82340176662433e-09, | |
| "loss": 0.47895991802215576, | |
| "step": 2283 | |
| }, | |
| { | |
| "epoch": 2.9628548256285483, | |
| "grad_norm": 0.5834701657295227, | |
| "learning_rate": 9.199951600951106e-09, | |
| "loss": 0.49841928482055664, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.9641524736415246, | |
| "grad_norm": 0.553411602973938, | |
| "learning_rate": 8.596930092662493e-09, | |
| "loss": 0.5044345855712891, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 2.9654501216545013, | |
| "grad_norm": 0.5765789151191711, | |
| "learning_rate": 8.014338474785499e-09, | |
| "loss": 0.45714667439460754, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.9667477696674776, | |
| "grad_norm": 0.5678233504295349, | |
| "learning_rate": 7.45217793857389e-09, | |
| "loss": 0.5142921209335327, | |
| "step": 2287 | |
| }, | |
| { | |
| "epoch": 2.9680454176804543, | |
| "grad_norm": 0.5809730887413025, | |
| "learning_rate": 6.910449633501515e-09, | |
| "loss": 0.5097491145133972, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.9693430656934305, | |
| "grad_norm": 0.863067626953125, | |
| "learning_rate": 6.389154667266751e-09, | |
| "loss": 0.49733829498291016, | |
| "step": 2289 | |
| }, | |
| { | |
| "epoch": 2.9706407137064073, | |
| "grad_norm": 0.5724239349365234, | |
| "learning_rate": 5.888294105785841e-09, | |
| "loss": 0.5271996855735779, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.9719383617193835, | |
| "grad_norm": 0.5894045829772949, | |
| "learning_rate": 5.407868973191788e-09, | |
| "loss": 0.5507649183273315, | |
| "step": 2291 | |
| }, | |
| { | |
| "epoch": 2.9732360097323602, | |
| "grad_norm": 0.5670002698898315, | |
| "learning_rate": 4.947880251832127e-09, | |
| "loss": 0.5069165229797363, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.9745336577453365, | |
| "grad_norm": 0.6079567074775696, | |
| "learning_rate": 4.508328882268931e-09, | |
| "loss": 0.5027692317962646, | |
| "step": 2293 | |
| }, | |
| { | |
| "epoch": 2.9758313057583132, | |
| "grad_norm": 0.5965436697006226, | |
| "learning_rate": 4.089215763271037e-09, | |
| "loss": 0.4549415707588196, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.9771289537712895, | |
| "grad_norm": 0.5540100336074829, | |
| "learning_rate": 3.6905417518195985e-09, | |
| "loss": 0.5082988739013672, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 2.9784266017842658, | |
| "grad_norm": 0.5584218502044678, | |
| "learning_rate": 3.312307663103642e-09, | |
| "loss": 0.49896612763404846, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.9797242497972425, | |
| "grad_norm": 0.5825123190879822, | |
| "learning_rate": 2.954514270513409e-09, | |
| "loss": 0.5268645286560059, | |
| "step": 2297 | |
| }, | |
| { | |
| "epoch": 2.981021897810219, | |
| "grad_norm": 0.6069872379302979, | |
| "learning_rate": 2.6171623056481245e-09, | |
| "loss": 0.5306706428527832, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.9823195458231955, | |
| "grad_norm": 0.619730532169342, | |
| "learning_rate": 2.300252458306007e-09, | |
| "loss": 0.5466433167457581, | |
| "step": 2299 | |
| }, | |
| { | |
| "epoch": 2.9836171938361717, | |
| "grad_norm": 0.575143039226532, | |
| "learning_rate": 2.0037853764887096e-09, | |
| "loss": 0.5247520804405212, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.9836171938361717, | |
| "eval_loss": 0.6951664686203003, | |
| "eval_runtime": 72.3726, | |
| "eval_samples_per_second": 71.74, | |
| "eval_steps_per_second": 8.967, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.9849148418491485, | |
| "grad_norm": 0.5698785781860352, | |
| "learning_rate": 1.7277616663946562e-09, | |
| "loss": 0.5104506015777588, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 2.986212489862125, | |
| "grad_norm": 0.5820271372795105, | |
| "learning_rate": 1.4721818924223752e-09, | |
| "loss": 0.5188534259796143, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.9875101378751014, | |
| "grad_norm": 0.5771408081054688, | |
| "learning_rate": 1.2370465771693874e-09, | |
| "loss": 0.5191137194633484, | |
| "step": 2303 | |
| }, | |
| { | |
| "epoch": 2.9888077858880777, | |
| "grad_norm": 0.555460512638092, | |
| "learning_rate": 1.0223562014277654e-09, | |
| "loss": 0.4951835870742798, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.9901054339010544, | |
| "grad_norm": 0.602135956287384, | |
| "learning_rate": 8.281112041841343e-10, | |
| "loss": 0.5143213272094727, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 2.9914030819140307, | |
| "grad_norm": 0.5755578875541687, | |
| "learning_rate": 6.543119826207811e-10, | |
| "loss": 0.5067423582077026, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.9927007299270074, | |
| "grad_norm": 0.585641622543335, | |
| "learning_rate": 5.009588921123243e-10, | |
| "loss": 0.49582135677337646, | |
| "step": 2307 | |
| }, | |
| { | |
| "epoch": 2.9939983779399837, | |
| "grad_norm": 0.5883374214172363, | |
| "learning_rate": 3.680522462279346e-10, | |
| "loss": 0.4730003774166107, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.9952960259529604, | |
| "grad_norm": 0.585075318813324, | |
| "learning_rate": 2.555923167291141e-10, | |
| "loss": 0.5166332721710205, | |
| "step": 2309 | |
| }, | |
| { | |
| "epoch": 2.9965936739659367, | |
| "grad_norm": 0.5931539535522461, | |
| "learning_rate": 1.635793335652558e-10, | |
| "loss": 0.5443276166915894, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.997891321978913, | |
| "grad_norm": 0.6000698804855347, | |
| "learning_rate": 9.20134848814147e-11, | |
| "loss": 0.4828116297721863, | |
| "step": 2311 | |
| }, | |
| { | |
| "epoch": 2.9991889699918897, | |
| "grad_norm": 0.5825672149658203, | |
| "learning_rate": 4.08949170105366e-11, | |
| "loss": 0.48934438824653625, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.8691220283508301, | |
| "learning_rate": 1.022373447900904e-11, | |
| "loss": 0.5870037078857422, | |
| "step": 2313 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2313, | |
| "total_flos": 8.852766725217714e+18, | |
| "train_loss": 0.5397342537911073, | |
| "train_runtime": 26894.7398, | |
| "train_samples_per_second": 11.002, | |
| "train_steps_per_second": 0.086 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 2313, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 230, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.852766725217714e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |