Instructions to use alabenayed/TounsiLM-8b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use alabenayed/TounsiLM-8b with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("CohereLabs/aya-expanse-8b") model = PeftModel.from_pretrained(base_model, "alabenayed/TounsiLM-8b") - Transformers
How to use alabenayed/TounsiLM-8b with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="alabenayed/TounsiLM-8b") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("alabenayed/TounsiLM-8b", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use alabenayed/TounsiLM-8b with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "alabenayed/TounsiLM-8b" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "alabenayed/TounsiLM-8b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/alabenayed/TounsiLM-8b
- SGLang
How to use alabenayed/TounsiLM-8b with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "alabenayed/TounsiLM-8b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "alabenayed/TounsiLM-8b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "alabenayed/TounsiLM-8b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "alabenayed/TounsiLM-8b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use alabenayed/TounsiLM-8b with Docker Model Runner:
docker model run hf.co/alabenayed/TounsiLM-8b
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 200, | |
| "global_step": 1584, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 2.4575500309467317, | |
| "epoch": 0.012630249447426587, | |
| "grad_norm": 4.916348934173584, | |
| "learning_rate": 1.8750000000000003e-06, | |
| "loss": 3.6598, | |
| "mean_token_accuracy": 0.4153611570596695, | |
| "num_tokens": 59642.0, | |
| "step": 10 | |
| }, | |
| { | |
| "entropy": 2.4072387635707857, | |
| "epoch": 0.025260498894853173, | |
| "grad_norm": 3.8026137351989746, | |
| "learning_rate": 3.958333333333333e-06, | |
| "loss": 3.3603, | |
| "mean_token_accuracy": 0.4350100517272949, | |
| "num_tokens": 119219.0, | |
| "step": 20 | |
| }, | |
| { | |
| "entropy": 2.3899864494800567, | |
| "epoch": 0.03789074834227976, | |
| "grad_norm": 3.7880399227142334, | |
| "learning_rate": 6.041666666666667e-06, | |
| "loss": 2.9434, | |
| "mean_token_accuracy": 0.4788561977446079, | |
| "num_tokens": 179590.0, | |
| "step": 30 | |
| }, | |
| { | |
| "entropy": 2.1122478008270265, | |
| "epoch": 0.05052099778970635, | |
| "grad_norm": 3.0592074394226074, | |
| "learning_rate": 8.125000000000001e-06, | |
| "loss": 2.3919, | |
| "mean_token_accuracy": 0.574567300081253, | |
| "num_tokens": 238845.0, | |
| "step": 40 | |
| }, | |
| { | |
| "entropy": 1.7037649989128112, | |
| "epoch": 0.06315124723713293, | |
| "grad_norm": 1.5836262702941895, | |
| "learning_rate": 9.993489583333334e-06, | |
| "loss": 1.912, | |
| "mean_token_accuracy": 0.6467478528618813, | |
| "num_tokens": 298317.0, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 1.5623225390911102, | |
| "epoch": 0.07578149668455952, | |
| "grad_norm": 1.217679738998413, | |
| "learning_rate": 9.928385416666668e-06, | |
| "loss": 1.6762, | |
| "mean_token_accuracy": 0.679128734767437, | |
| "num_tokens": 357858.0, | |
| "step": 60 | |
| }, | |
| { | |
| "entropy": 1.5071247130632401, | |
| "epoch": 0.0884117461319861, | |
| "grad_norm": 0.973615288734436, | |
| "learning_rate": 9.863281250000001e-06, | |
| "loss": 1.5372, | |
| "mean_token_accuracy": 0.6943170607089997, | |
| "num_tokens": 418834.0, | |
| "step": 70 | |
| }, | |
| { | |
| "entropy": 1.4568549275398255, | |
| "epoch": 0.1010419955794127, | |
| "grad_norm": 0.9853116869926453, | |
| "learning_rate": 9.798177083333335e-06, | |
| "loss": 1.4751, | |
| "mean_token_accuracy": 0.7024633795022964, | |
| "num_tokens": 478960.0, | |
| "step": 80 | |
| }, | |
| { | |
| "entropy": 1.4889154583215714, | |
| "epoch": 0.11367224502683929, | |
| "grad_norm": 0.9147132039070129, | |
| "learning_rate": 9.733072916666667e-06, | |
| "loss": 1.474, | |
| "mean_token_accuracy": 0.6996816232800483, | |
| "num_tokens": 541795.0, | |
| "step": 90 | |
| }, | |
| { | |
| "entropy": 1.4158774405717849, | |
| "epoch": 0.12630249447426586, | |
| "grad_norm": 0.9684887528419495, | |
| "learning_rate": 9.66796875e-06, | |
| "loss": 1.3805, | |
| "mean_token_accuracy": 0.7165829420089722, | |
| "num_tokens": 601174.0, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 1.4276181221008302, | |
| "epoch": 0.13893274392169244, | |
| "grad_norm": 0.9440239667892456, | |
| "learning_rate": 9.602864583333335e-06, | |
| "loss": 1.3718, | |
| "mean_token_accuracy": 0.7143253713846207, | |
| "num_tokens": 661048.0, | |
| "step": 110 | |
| }, | |
| { | |
| "entropy": 1.4359370201826096, | |
| "epoch": 0.15156299336911905, | |
| "grad_norm": 0.8779081702232361, | |
| "learning_rate": 9.537760416666667e-06, | |
| "loss": 1.3661, | |
| "mean_token_accuracy": 0.7162409156560898, | |
| "num_tokens": 722298.0, | |
| "step": 120 | |
| }, | |
| { | |
| "entropy": 1.3943599790334702, | |
| "epoch": 0.16419324281654563, | |
| "grad_norm": 0.8999291062355042, | |
| "learning_rate": 9.47265625e-06, | |
| "loss": 1.3193, | |
| "mean_token_accuracy": 0.7252198755741119, | |
| "num_tokens": 782683.0, | |
| "step": 130 | |
| }, | |
| { | |
| "entropy": 1.3758090347051621, | |
| "epoch": 0.1768234922639722, | |
| "grad_norm": 0.8218080997467041, | |
| "learning_rate": 9.407552083333334e-06, | |
| "loss": 1.3054, | |
| "mean_token_accuracy": 0.7277572214603424, | |
| "num_tokens": 842988.0, | |
| "step": 140 | |
| }, | |
| { | |
| "entropy": 1.381770172715187, | |
| "epoch": 0.1894537417113988, | |
| "grad_norm": 0.8062577843666077, | |
| "learning_rate": 9.342447916666668e-06, | |
| "loss": 1.3291, | |
| "mean_token_accuracy": 0.7222751513123512, | |
| "num_tokens": 903912.0, | |
| "step": 150 | |
| }, | |
| { | |
| "entropy": 1.352141672372818, | |
| "epoch": 0.2020839911588254, | |
| "grad_norm": 0.8221862316131592, | |
| "learning_rate": 9.277343750000001e-06, | |
| "loss": 1.2974, | |
| "mean_token_accuracy": 0.7260218441486359, | |
| "num_tokens": 964887.0, | |
| "step": 160 | |
| }, | |
| { | |
| "entropy": 1.346352329850197, | |
| "epoch": 0.21471424060625197, | |
| "grad_norm": 0.7375346422195435, | |
| "learning_rate": 9.212239583333335e-06, | |
| "loss": 1.2969, | |
| "mean_token_accuracy": 0.7252495244145394, | |
| "num_tokens": 1026887.0, | |
| "step": 170 | |
| }, | |
| { | |
| "entropy": 1.3165962457656861, | |
| "epoch": 0.22734449005367857, | |
| "grad_norm": 0.7950690388679504, | |
| "learning_rate": 9.147135416666667e-06, | |
| "loss": 1.2824, | |
| "mean_token_accuracy": 0.7250601649284363, | |
| "num_tokens": 1086995.0, | |
| "step": 180 | |
| }, | |
| { | |
| "entropy": 1.3047442227602004, | |
| "epoch": 0.23997473950110515, | |
| "grad_norm": 0.7147737145423889, | |
| "learning_rate": 9.082031250000001e-06, | |
| "loss": 1.2628, | |
| "mean_token_accuracy": 0.7318986386060715, | |
| "num_tokens": 1147209.0, | |
| "step": 190 | |
| }, | |
| { | |
| "entropy": 1.2989415228366852, | |
| "epoch": 0.25260498894853173, | |
| "grad_norm": 0.756094753742218, | |
| "learning_rate": 9.016927083333335e-06, | |
| "loss": 1.2484, | |
| "mean_token_accuracy": 0.7319697335362434, | |
| "num_tokens": 1207602.0, | |
| "step": 200 | |
| }, | |
| { | |
| "entropy": 1.2904020875692368, | |
| "epoch": 0.2652352383959583, | |
| "grad_norm": 0.7715655565261841, | |
| "learning_rate": 8.951822916666667e-06, | |
| "loss": 1.2447, | |
| "mean_token_accuracy": 0.7349080622196198, | |
| "num_tokens": 1267500.0, | |
| "step": 210 | |
| }, | |
| { | |
| "entropy": 1.2543610483407974, | |
| "epoch": 0.2778654878433849, | |
| "grad_norm": 0.6824166774749756, | |
| "learning_rate": 8.88671875e-06, | |
| "loss": 1.2111, | |
| "mean_token_accuracy": 0.7386362582445145, | |
| "num_tokens": 1327666.0, | |
| "step": 220 | |
| }, | |
| { | |
| "entropy": 1.2946221768856048, | |
| "epoch": 0.2904957372908115, | |
| "grad_norm": 0.6559598445892334, | |
| "learning_rate": 8.821614583333334e-06, | |
| "loss": 1.2574, | |
| "mean_token_accuracy": 0.7287471711635589, | |
| "num_tokens": 1389712.0, | |
| "step": 230 | |
| }, | |
| { | |
| "entropy": 1.2489944666624069, | |
| "epoch": 0.3031259867382381, | |
| "grad_norm": 0.7000382542610168, | |
| "learning_rate": 8.756510416666666e-06, | |
| "loss": 1.2092, | |
| "mean_token_accuracy": 0.7372458636760711, | |
| "num_tokens": 1448670.0, | |
| "step": 240 | |
| }, | |
| { | |
| "entropy": 1.2534994542598725, | |
| "epoch": 0.3157562361856647, | |
| "grad_norm": 0.6579836010932922, | |
| "learning_rate": 8.69140625e-06, | |
| "loss": 1.2132, | |
| "mean_token_accuracy": 0.7380462676286698, | |
| "num_tokens": 1508428.0, | |
| "step": 250 | |
| }, | |
| { | |
| "entropy": 1.2474523901939392, | |
| "epoch": 0.32838648563309125, | |
| "grad_norm": 0.6546089053153992, | |
| "learning_rate": 8.626302083333334e-06, | |
| "loss": 1.2103, | |
| "mean_token_accuracy": 0.7395781621336937, | |
| "num_tokens": 1568018.0, | |
| "step": 260 | |
| }, | |
| { | |
| "entropy": 1.2445458561182021, | |
| "epoch": 0.34101673508051783, | |
| "grad_norm": 0.6377413868904114, | |
| "learning_rate": 8.561197916666667e-06, | |
| "loss": 1.2007, | |
| "mean_token_accuracy": 0.7419240340590477, | |
| "num_tokens": 1627904.0, | |
| "step": 270 | |
| }, | |
| { | |
| "entropy": 1.279063493013382, | |
| "epoch": 0.3536469845279444, | |
| "grad_norm": 0.6460844278335571, | |
| "learning_rate": 8.496093750000001e-06, | |
| "loss": 1.2497, | |
| "mean_token_accuracy": 0.729638360440731, | |
| "num_tokens": 1689637.0, | |
| "step": 280 | |
| }, | |
| { | |
| "entropy": 1.2362476408481597, | |
| "epoch": 0.366277233975371, | |
| "grad_norm": 0.6648440361022949, | |
| "learning_rate": 8.430989583333335e-06, | |
| "loss": 1.2091, | |
| "mean_token_accuracy": 0.7385585099458695, | |
| "num_tokens": 1749861.0, | |
| "step": 290 | |
| }, | |
| { | |
| "entropy": 1.2533661901950837, | |
| "epoch": 0.3789074834227976, | |
| "grad_norm": 0.6637682318687439, | |
| "learning_rate": 8.365885416666667e-06, | |
| "loss": 1.2163, | |
| "mean_token_accuracy": 0.7371826618909836, | |
| "num_tokens": 1810407.0, | |
| "step": 300 | |
| }, | |
| { | |
| "entropy": 1.2383619010448457, | |
| "epoch": 0.3915377328702242, | |
| "grad_norm": 0.660043478012085, | |
| "learning_rate": 8.30078125e-06, | |
| "loss": 1.2026, | |
| "mean_token_accuracy": 0.7364327058196067, | |
| "num_tokens": 1871544.0, | |
| "step": 310 | |
| }, | |
| { | |
| "entropy": 1.2316229462623596, | |
| "epoch": 0.4041679823176508, | |
| "grad_norm": 0.6285788416862488, | |
| "learning_rate": 8.235677083333334e-06, | |
| "loss": 1.2064, | |
| "mean_token_accuracy": 0.7371214032173157, | |
| "num_tokens": 1932125.0, | |
| "step": 320 | |
| }, | |
| { | |
| "entropy": 1.2459111303091048, | |
| "epoch": 0.41679823176507735, | |
| "grad_norm": 0.6204569339752197, | |
| "learning_rate": 8.170572916666666e-06, | |
| "loss": 1.1997, | |
| "mean_token_accuracy": 0.7365512102842331, | |
| "num_tokens": 1993924.0, | |
| "step": 330 | |
| }, | |
| { | |
| "entropy": 1.2156363114714623, | |
| "epoch": 0.42942848121250393, | |
| "grad_norm": 0.6501284241676331, | |
| "learning_rate": 8.10546875e-06, | |
| "loss": 1.1863, | |
| "mean_token_accuracy": 0.741255110502243, | |
| "num_tokens": 2054496.0, | |
| "step": 340 | |
| }, | |
| { | |
| "entropy": 1.2222040683031081, | |
| "epoch": 0.4420587306599305, | |
| "grad_norm": 0.602418065071106, | |
| "learning_rate": 8.040364583333334e-06, | |
| "loss": 1.1913, | |
| "mean_token_accuracy": 0.739654652774334, | |
| "num_tokens": 2114825.0, | |
| "step": 350 | |
| }, | |
| { | |
| "entropy": 1.2437947690486908, | |
| "epoch": 0.45468898010735714, | |
| "grad_norm": 0.6289706230163574, | |
| "learning_rate": 7.975260416666668e-06, | |
| "loss": 1.2142, | |
| "mean_token_accuracy": 0.7374308854341507, | |
| "num_tokens": 2176058.0, | |
| "step": 360 | |
| }, | |
| { | |
| "entropy": 1.2139764934778214, | |
| "epoch": 0.4673192295547837, | |
| "grad_norm": 0.6439516544342041, | |
| "learning_rate": 7.910156250000001e-06, | |
| "loss": 1.1769, | |
| "mean_token_accuracy": 0.7426491379737854, | |
| "num_tokens": 2236783.0, | |
| "step": 370 | |
| }, | |
| { | |
| "entropy": 1.19720456302166, | |
| "epoch": 0.4799494790022103, | |
| "grad_norm": 0.6499606966972351, | |
| "learning_rate": 7.845052083333335e-06, | |
| "loss": 1.1829, | |
| "mean_token_accuracy": 0.7399616882205009, | |
| "num_tokens": 2298432.0, | |
| "step": 380 | |
| }, | |
| { | |
| "entropy": 1.205560651421547, | |
| "epoch": 0.4925797284496369, | |
| "grad_norm": 0.6545577645301819, | |
| "learning_rate": 7.779947916666667e-06, | |
| "loss": 1.1577, | |
| "mean_token_accuracy": 0.7463845536112785, | |
| "num_tokens": 2357808.0, | |
| "step": 390 | |
| }, | |
| { | |
| "entropy": 1.19621299803257, | |
| "epoch": 0.5052099778970635, | |
| "grad_norm": 0.6930111050605774, | |
| "learning_rate": 7.71484375e-06, | |
| "loss": 1.1583, | |
| "mean_token_accuracy": 0.7453805327415466, | |
| "num_tokens": 2417574.0, | |
| "step": 400 | |
| }, | |
| { | |
| "entropy": 1.1963690370321274, | |
| "epoch": 0.5178402273444901, | |
| "grad_norm": 0.648593544960022, | |
| "learning_rate": 7.649739583333334e-06, | |
| "loss": 1.1723, | |
| "mean_token_accuracy": 0.7415376961231231, | |
| "num_tokens": 2478088.0, | |
| "step": 410 | |
| }, | |
| { | |
| "entropy": 1.216522666811943, | |
| "epoch": 0.5304704767919166, | |
| "grad_norm": 0.6348926424980164, | |
| "learning_rate": 7.5846354166666665e-06, | |
| "loss": 1.1701, | |
| "mean_token_accuracy": 0.7432737082242966, | |
| "num_tokens": 2538612.0, | |
| "step": 420 | |
| }, | |
| { | |
| "entropy": 1.1990931153297424, | |
| "epoch": 0.5431007262393432, | |
| "grad_norm": 0.627249002456665, | |
| "learning_rate": 7.51953125e-06, | |
| "loss": 1.1688, | |
| "mean_token_accuracy": 0.7435364574193954, | |
| "num_tokens": 2599023.0, | |
| "step": 430 | |
| }, | |
| { | |
| "entropy": 1.1872963696718215, | |
| "epoch": 0.5557309756867698, | |
| "grad_norm": 0.6614134311676025, | |
| "learning_rate": 7.454427083333334e-06, | |
| "loss": 1.1622, | |
| "mean_token_accuracy": 0.7470521196722985, | |
| "num_tokens": 2658338.0, | |
| "step": 440 | |
| }, | |
| { | |
| "entropy": 1.215770760178566, | |
| "epoch": 0.5683612251341964, | |
| "grad_norm": 0.6228342652320862, | |
| "learning_rate": 7.389322916666667e-06, | |
| "loss": 1.1898, | |
| "mean_token_accuracy": 0.7409805700182914, | |
| "num_tokens": 2719316.0, | |
| "step": 450 | |
| }, | |
| { | |
| "entropy": 1.1998004853725432, | |
| "epoch": 0.580991474581623, | |
| "grad_norm": 0.6525698304176331, | |
| "learning_rate": 7.3242187500000006e-06, | |
| "loss": 1.167, | |
| "mean_token_accuracy": 0.7438512742519379, | |
| "num_tokens": 2780272.0, | |
| "step": 460 | |
| }, | |
| { | |
| "entropy": 1.1898580551147462, | |
| "epoch": 0.5936217240290496, | |
| "grad_norm": 0.6669884324073792, | |
| "learning_rate": 7.259114583333334e-06, | |
| "loss": 1.1669, | |
| "mean_token_accuracy": 0.7437147945165634, | |
| "num_tokens": 2840261.0, | |
| "step": 470 | |
| }, | |
| { | |
| "entropy": 1.21882204413414, | |
| "epoch": 0.6062519734764762, | |
| "grad_norm": 0.6129422783851624, | |
| "learning_rate": 7.194010416666667e-06, | |
| "loss": 1.177, | |
| "mean_token_accuracy": 0.7423913896083831, | |
| "num_tokens": 2901347.0, | |
| "step": 480 | |
| }, | |
| { | |
| "entropy": 1.163309469819069, | |
| "epoch": 0.6188822229239027, | |
| "grad_norm": 0.6334741115570068, | |
| "learning_rate": 7.128906250000001e-06, | |
| "loss": 1.1393, | |
| "mean_token_accuracy": 0.7511255607008934, | |
| "num_tokens": 2960518.0, | |
| "step": 490 | |
| }, | |
| { | |
| "entropy": 1.1994746267795562, | |
| "epoch": 0.6315124723713293, | |
| "grad_norm": 0.6261829733848572, | |
| "learning_rate": 7.063802083333335e-06, | |
| "loss": 1.1605, | |
| "mean_token_accuracy": 0.7433080047369003, | |
| "num_tokens": 3021957.0, | |
| "step": 500 | |
| }, | |
| { | |
| "entropy": 1.167793545126915, | |
| "epoch": 0.6441427218187559, | |
| "grad_norm": 0.5909908413887024, | |
| "learning_rate": 6.998697916666667e-06, | |
| "loss": 1.1468, | |
| "mean_token_accuracy": 0.7475745663046837, | |
| "num_tokens": 3083301.0, | |
| "step": 510 | |
| }, | |
| { | |
| "entropy": 1.1670663714408875, | |
| "epoch": 0.6567729712661825, | |
| "grad_norm": 0.6018249988555908, | |
| "learning_rate": 6.93359375e-06, | |
| "loss": 1.1425, | |
| "mean_token_accuracy": 0.7485125616192818, | |
| "num_tokens": 3143187.0, | |
| "step": 520 | |
| }, | |
| { | |
| "entropy": 1.1626142784953117, | |
| "epoch": 0.6694032207136091, | |
| "grad_norm": 0.6088816523551941, | |
| "learning_rate": 6.868489583333334e-06, | |
| "loss": 1.1297, | |
| "mean_token_accuracy": 0.7490727782249451, | |
| "num_tokens": 3202489.0, | |
| "step": 530 | |
| }, | |
| { | |
| "entropy": 1.1758243769407273, | |
| "epoch": 0.6820334701610357, | |
| "grad_norm": 0.6021592020988464, | |
| "learning_rate": 6.803385416666667e-06, | |
| "loss": 1.1656, | |
| "mean_token_accuracy": 0.7443674057722092, | |
| "num_tokens": 3263476.0, | |
| "step": 540 | |
| }, | |
| { | |
| "entropy": 1.179671287536621, | |
| "epoch": 0.6946637196084623, | |
| "grad_norm": 0.5955655574798584, | |
| "learning_rate": 6.738281250000001e-06, | |
| "loss": 1.1385, | |
| "mean_token_accuracy": 0.7481714516878128, | |
| "num_tokens": 3324008.0, | |
| "step": 550 | |
| }, | |
| { | |
| "entropy": 1.1886188358068466, | |
| "epoch": 0.7072939690558888, | |
| "grad_norm": 0.6246835589408875, | |
| "learning_rate": 6.6731770833333345e-06, | |
| "loss": 1.1607, | |
| "mean_token_accuracy": 0.7447509884834289, | |
| "num_tokens": 3383861.0, | |
| "step": 560 | |
| }, | |
| { | |
| "entropy": 1.1690475821495057, | |
| "epoch": 0.7199242185033154, | |
| "grad_norm": 0.606743335723877, | |
| "learning_rate": 6.6080729166666665e-06, | |
| "loss": 1.1298, | |
| "mean_token_accuracy": 0.7493681326508522, | |
| "num_tokens": 3443946.0, | |
| "step": 570 | |
| }, | |
| { | |
| "entropy": 1.1725697651505471, | |
| "epoch": 0.732554467950742, | |
| "grad_norm": 0.6846170425415039, | |
| "learning_rate": 6.54296875e-06, | |
| "loss": 1.1452, | |
| "mean_token_accuracy": 0.7482522815465927, | |
| "num_tokens": 3503787.0, | |
| "step": 580 | |
| }, | |
| { | |
| "entropy": 1.1713406786322593, | |
| "epoch": 0.7451847173981686, | |
| "grad_norm": 0.6522074341773987, | |
| "learning_rate": 6.477864583333334e-06, | |
| "loss": 1.1338, | |
| "mean_token_accuracy": 0.7498400524258614, | |
| "num_tokens": 3563403.0, | |
| "step": 590 | |
| }, | |
| { | |
| "entropy": 1.1848436295986176, | |
| "epoch": 0.7578149668455952, | |
| "grad_norm": 0.6417824625968933, | |
| "learning_rate": 6.412760416666667e-06, | |
| "loss": 1.1499, | |
| "mean_token_accuracy": 0.7452719643712044, | |
| "num_tokens": 3625007.0, | |
| "step": 600 | |
| }, | |
| { | |
| "entropy": 1.1822121858596801, | |
| "epoch": 0.7704452162930218, | |
| "grad_norm": 0.6329619884490967, | |
| "learning_rate": 6.3476562500000006e-06, | |
| "loss": 1.159, | |
| "mean_token_accuracy": 0.7452733591198921, | |
| "num_tokens": 3686099.0, | |
| "step": 610 | |
| }, | |
| { | |
| "entropy": 1.190292978286743, | |
| "epoch": 0.7830754657404484, | |
| "grad_norm": 0.6627410054206848, | |
| "learning_rate": 6.282552083333334e-06, | |
| "loss": 1.1558, | |
| "mean_token_accuracy": 0.7438480347394943, | |
| "num_tokens": 3747233.0, | |
| "step": 620 | |
| }, | |
| { | |
| "entropy": 1.1619529083371163, | |
| "epoch": 0.7957057151878749, | |
| "grad_norm": 0.5941329002380371, | |
| "learning_rate": 6.217447916666667e-06, | |
| "loss": 1.1377, | |
| "mean_token_accuracy": 0.7503219902515411, | |
| "num_tokens": 3807833.0, | |
| "step": 630 | |
| }, | |
| { | |
| "entropy": 1.1658748656511306, | |
| "epoch": 0.8083359646353016, | |
| "grad_norm": 0.6438832879066467, | |
| "learning_rate": 6.152343750000001e-06, | |
| "loss": 1.1397, | |
| "mean_token_accuracy": 0.7471553102135658, | |
| "num_tokens": 3868549.0, | |
| "step": 640 | |
| }, | |
| { | |
| "entropy": 1.1782082825899125, | |
| "epoch": 0.8209662140827282, | |
| "grad_norm": 0.6389635801315308, | |
| "learning_rate": 6.087239583333335e-06, | |
| "loss": 1.1434, | |
| "mean_token_accuracy": 0.7477709770202636, | |
| "num_tokens": 3929057.0, | |
| "step": 650 | |
| }, | |
| { | |
| "entropy": 1.1625961899757384, | |
| "epoch": 0.8335964635301547, | |
| "grad_norm": 0.6134201288223267, | |
| "learning_rate": 6.022135416666667e-06, | |
| "loss": 1.1352, | |
| "mean_token_accuracy": 0.748055274784565, | |
| "num_tokens": 3990676.0, | |
| "step": 660 | |
| }, | |
| { | |
| "entropy": 1.1510928481817246, | |
| "epoch": 0.8462267129775813, | |
| "grad_norm": 0.6336613893508911, | |
| "learning_rate": 5.95703125e-06, | |
| "loss": 1.1182, | |
| "mean_token_accuracy": 0.7524245917797089, | |
| "num_tokens": 4051046.0, | |
| "step": 670 | |
| }, | |
| { | |
| "entropy": 1.1498646020889283, | |
| "epoch": 0.8588569624250079, | |
| "grad_norm": 0.6758144497871399, | |
| "learning_rate": 5.891927083333334e-06, | |
| "loss": 1.1186, | |
| "mean_token_accuracy": 0.7507978692650795, | |
| "num_tokens": 4111084.0, | |
| "step": 680 | |
| }, | |
| { | |
| "entropy": 1.167962297797203, | |
| "epoch": 0.8714872118724345, | |
| "grad_norm": 0.6285990476608276, | |
| "learning_rate": 5.826822916666667e-06, | |
| "loss": 1.1395, | |
| "mean_token_accuracy": 0.7476246923208236, | |
| "num_tokens": 4172628.0, | |
| "step": 690 | |
| }, | |
| { | |
| "entropy": 1.1178194358944893, | |
| "epoch": 0.884117461319861, | |
| "grad_norm": 0.64762282371521, | |
| "learning_rate": 5.761718750000001e-06, | |
| "loss": 1.0919, | |
| "mean_token_accuracy": 0.7569874793291091, | |
| "num_tokens": 4231821.0, | |
| "step": 700 | |
| }, | |
| { | |
| "entropy": 1.1606462925672532, | |
| "epoch": 0.8967477107672877, | |
| "grad_norm": 0.6292758584022522, | |
| "learning_rate": 5.6966145833333344e-06, | |
| "loss": 1.1354, | |
| "mean_token_accuracy": 0.750880953669548, | |
| "num_tokens": 4292646.0, | |
| "step": 710 | |
| }, | |
| { | |
| "entropy": 1.1580617666244506, | |
| "epoch": 0.9093779602147143, | |
| "grad_norm": 0.6393706798553467, | |
| "learning_rate": 5.6315104166666665e-06, | |
| "loss": 1.1205, | |
| "mean_token_accuracy": 0.7499566927552224, | |
| "num_tokens": 4353199.0, | |
| "step": 720 | |
| }, | |
| { | |
| "entropy": 1.1515695974230766, | |
| "epoch": 0.9220082096621408, | |
| "grad_norm": 0.687380313873291, | |
| "learning_rate": 5.56640625e-06, | |
| "loss": 1.1138, | |
| "mean_token_accuracy": 0.7514134287834168, | |
| "num_tokens": 4414122.0, | |
| "step": 730 | |
| }, | |
| { | |
| "entropy": 1.1574165880680085, | |
| "epoch": 0.9346384591095674, | |
| "grad_norm": 0.6102684736251831, | |
| "learning_rate": 5.501302083333334e-06, | |
| "loss": 1.1302, | |
| "mean_token_accuracy": 0.7507740229368209, | |
| "num_tokens": 4474548.0, | |
| "step": 740 | |
| }, | |
| { | |
| "entropy": 1.1491190433502196, | |
| "epoch": 0.947268708556994, | |
| "grad_norm": 0.623504638671875, | |
| "learning_rate": 5.436197916666667e-06, | |
| "loss": 1.129, | |
| "mean_token_accuracy": 0.7512574091553688, | |
| "num_tokens": 4534678.0, | |
| "step": 750 | |
| }, | |
| { | |
| "entropy": 1.1538215219974517, | |
| "epoch": 0.9598989580044206, | |
| "grad_norm": 0.6368807554244995, | |
| "learning_rate": 5.3710937500000005e-06, | |
| "loss": 1.1181, | |
| "mean_token_accuracy": 0.7520082175731659, | |
| "num_tokens": 4594878.0, | |
| "step": 760 | |
| }, | |
| { | |
| "entropy": 1.1623035803437234, | |
| "epoch": 0.9725292074518471, | |
| "grad_norm": 0.6332852840423584, | |
| "learning_rate": 5.305989583333334e-06, | |
| "loss": 1.1308, | |
| "mean_token_accuracy": 0.7497873172163964, | |
| "num_tokens": 4656513.0, | |
| "step": 770 | |
| }, | |
| { | |
| "entropy": 1.1483627527952194, | |
| "epoch": 0.9851594568992738, | |
| "grad_norm": 0.6341389417648315, | |
| "learning_rate": 5.240885416666667e-06, | |
| "loss": 1.1142, | |
| "mean_token_accuracy": 0.7533516198396683, | |
| "num_tokens": 4717111.0, | |
| "step": 780 | |
| }, | |
| { | |
| "entropy": 1.1455359414219857, | |
| "epoch": 0.9977897063467004, | |
| "grad_norm": 0.6641396880149841, | |
| "learning_rate": 5.17578125e-06, | |
| "loss": 1.1117, | |
| "mean_token_accuracy": 0.7530950620770455, | |
| "num_tokens": 4777713.0, | |
| "step": 790 | |
| }, | |
| { | |
| "entropy": 1.148778918461922, | |
| "epoch": 1.0101041995579412, | |
| "grad_norm": 0.6454346776008606, | |
| "learning_rate": 5.110677083333334e-06, | |
| "loss": 1.1146, | |
| "mean_token_accuracy": 0.7511914097345792, | |
| "num_tokens": 4837103.0, | |
| "step": 800 | |
| }, | |
| { | |
| "entropy": 1.1441998034715652, | |
| "epoch": 1.0227344490053678, | |
| "grad_norm": 0.6368332505226135, | |
| "learning_rate": 5.045572916666667e-06, | |
| "loss": 1.1003, | |
| "mean_token_accuracy": 0.7535203993320465, | |
| "num_tokens": 4898715.0, | |
| "step": 810 | |
| }, | |
| { | |
| "entropy": 1.1195117503404617, | |
| "epoch": 1.0353646984527944, | |
| "grad_norm": 0.6546683311462402, | |
| "learning_rate": 4.98046875e-06, | |
| "loss": 1.0924, | |
| "mean_token_accuracy": 0.7574156150221825, | |
| "num_tokens": 4959681.0, | |
| "step": 820 | |
| }, | |
| { | |
| "entropy": 1.1403603315353394, | |
| "epoch": 1.047994947900221, | |
| "grad_norm": 0.6645976305007935, | |
| "learning_rate": 4.915364583333333e-06, | |
| "loss": 1.1031, | |
| "mean_token_accuracy": 0.7548869714140892, | |
| "num_tokens": 5020382.0, | |
| "step": 830 | |
| }, | |
| { | |
| "entropy": 1.1299657106399537, | |
| "epoch": 1.0606251973476477, | |
| "grad_norm": 0.6225126385688782, | |
| "learning_rate": 4.850260416666667e-06, | |
| "loss": 1.0915, | |
| "mean_token_accuracy": 0.7562400087714195, | |
| "num_tokens": 5080360.0, | |
| "step": 840 | |
| }, | |
| { | |
| "entropy": 1.12370226085186, | |
| "epoch": 1.0732554467950741, | |
| "grad_norm": 0.6478942036628723, | |
| "learning_rate": 4.785156250000001e-06, | |
| "loss": 1.1064, | |
| "mean_token_accuracy": 0.7542634457349777, | |
| "num_tokens": 5140349.0, | |
| "step": 850 | |
| }, | |
| { | |
| "entropy": 1.1469928681850434, | |
| "epoch": 1.0858856962425008, | |
| "grad_norm": 0.615678608417511, | |
| "learning_rate": 4.7200520833333336e-06, | |
| "loss": 1.1043, | |
| "mean_token_accuracy": 0.7529336720705032, | |
| "num_tokens": 5201690.0, | |
| "step": 860 | |
| }, | |
| { | |
| "entropy": 1.137891921401024, | |
| "epoch": 1.0985159456899274, | |
| "grad_norm": 0.6458525061607361, | |
| "learning_rate": 4.654947916666667e-06, | |
| "loss": 1.1081, | |
| "mean_token_accuracy": 0.7543051362037658, | |
| "num_tokens": 5261698.0, | |
| "step": 870 | |
| }, | |
| { | |
| "entropy": 1.1202880129218102, | |
| "epoch": 1.111146195137354, | |
| "grad_norm": 0.6362131237983704, | |
| "learning_rate": 4.58984375e-06, | |
| "loss": 1.0951, | |
| "mean_token_accuracy": 0.7552427321672439, | |
| "num_tokens": 5321775.0, | |
| "step": 880 | |
| }, | |
| { | |
| "entropy": 1.1365787714719773, | |
| "epoch": 1.1237764445847804, | |
| "grad_norm": 0.6511764526367188, | |
| "learning_rate": 4.524739583333334e-06, | |
| "loss": 1.0961, | |
| "mean_token_accuracy": 0.7562274217605591, | |
| "num_tokens": 5383140.0, | |
| "step": 890 | |
| }, | |
| { | |
| "entropy": 1.1074503496289254, | |
| "epoch": 1.136406694032207, | |
| "grad_norm": 0.6207822561264038, | |
| "learning_rate": 4.459635416666668e-06, | |
| "loss": 1.0848, | |
| "mean_token_accuracy": 0.7591574639081955, | |
| "num_tokens": 5443006.0, | |
| "step": 900 | |
| }, | |
| { | |
| "entropy": 1.1545074522495269, | |
| "epoch": 1.1490369434796337, | |
| "grad_norm": 0.6404831409454346, | |
| "learning_rate": 4.3945312500000005e-06, | |
| "loss": 1.1121, | |
| "mean_token_accuracy": 0.7507721096277237, | |
| "num_tokens": 5503942.0, | |
| "step": 910 | |
| }, | |
| { | |
| "entropy": 1.1401477769017219, | |
| "epoch": 1.1616671929270603, | |
| "grad_norm": 0.6468749046325684, | |
| "learning_rate": 4.329427083333333e-06, | |
| "loss": 1.1011, | |
| "mean_token_accuracy": 0.753543746471405, | |
| "num_tokens": 5564518.0, | |
| "step": 920 | |
| }, | |
| { | |
| "entropy": 1.0945423126220704, | |
| "epoch": 1.174297442374487, | |
| "grad_norm": 0.6418051719665527, | |
| "learning_rate": 4.264322916666667e-06, | |
| "loss": 1.0614, | |
| "mean_token_accuracy": 0.7643799662590027, | |
| "num_tokens": 5624109.0, | |
| "step": 930 | |
| }, | |
| { | |
| "entropy": 1.1136713281273842, | |
| "epoch": 1.1869276918219134, | |
| "grad_norm": 0.6422064304351807, | |
| "learning_rate": 4.19921875e-06, | |
| "loss": 1.0974, | |
| "mean_token_accuracy": 0.7561314895749092, | |
| "num_tokens": 5684801.0, | |
| "step": 940 | |
| }, | |
| { | |
| "entropy": 1.1215770334005355, | |
| "epoch": 1.19955794126934, | |
| "grad_norm": 0.6453995108604431, | |
| "learning_rate": 4.134114583333334e-06, | |
| "loss": 1.0801, | |
| "mean_token_accuracy": 0.7590720430016518, | |
| "num_tokens": 5745499.0, | |
| "step": 950 | |
| }, | |
| { | |
| "entropy": 1.1010483756661416, | |
| "epoch": 1.2121881907167666, | |
| "grad_norm": 0.61696857213974, | |
| "learning_rate": 4.0690104166666675e-06, | |
| "loss": 1.049, | |
| "mean_token_accuracy": 0.7627070844173431, | |
| "num_tokens": 5806117.0, | |
| "step": 960 | |
| }, | |
| { | |
| "entropy": 1.1082940384745599, | |
| "epoch": 1.2248184401641933, | |
| "grad_norm": 0.6523500680923462, | |
| "learning_rate": 4.00390625e-06, | |
| "loss": 1.0807, | |
| "mean_token_accuracy": 0.7579552844166756, | |
| "num_tokens": 5865537.0, | |
| "step": 970 | |
| }, | |
| { | |
| "entropy": 1.102595229446888, | |
| "epoch": 1.23744868961162, | |
| "grad_norm": 0.6376118063926697, | |
| "learning_rate": 3.938802083333333e-06, | |
| "loss": 1.0679, | |
| "mean_token_accuracy": 0.7592279806733131, | |
| "num_tokens": 5925254.0, | |
| "step": 980 | |
| }, | |
| { | |
| "entropy": 1.1277900233864784, | |
| "epoch": 1.2500789390590463, | |
| "grad_norm": 0.6571747660636902, | |
| "learning_rate": 3.873697916666667e-06, | |
| "loss": 1.0888, | |
| "mean_token_accuracy": 0.7549166217446327, | |
| "num_tokens": 5986084.0, | |
| "step": 990 | |
| }, | |
| { | |
| "entropy": 1.113915103673935, | |
| "epoch": 1.262709188506473, | |
| "grad_norm": 0.6531611084938049, | |
| "learning_rate": 3.8085937500000002e-06, | |
| "loss": 1.0718, | |
| "mean_token_accuracy": 0.7577856734395028, | |
| "num_tokens": 6046857.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "entropy": 1.0966202467679977, | |
| "epoch": 1.2753394379538996, | |
| "grad_norm": 0.636698842048645, | |
| "learning_rate": 3.7434895833333336e-06, | |
| "loss": 1.0699, | |
| "mean_token_accuracy": 0.7601938605308532, | |
| "num_tokens": 6106886.0, | |
| "step": 1010 | |
| }, | |
| { | |
| "entropy": 1.1121985822916032, | |
| "epoch": 1.2879696874013262, | |
| "grad_norm": 0.6492161750793457, | |
| "learning_rate": 3.6783854166666673e-06, | |
| "loss": 1.0851, | |
| "mean_token_accuracy": 0.7588792949914932, | |
| "num_tokens": 6167935.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "entropy": 1.1355163961648942, | |
| "epoch": 1.3005999368487529, | |
| "grad_norm": 0.6697131395339966, | |
| "learning_rate": 3.61328125e-06, | |
| "loss": 1.094, | |
| "mean_token_accuracy": 0.754327917098999, | |
| "num_tokens": 6228870.0, | |
| "step": 1030 | |
| }, | |
| { | |
| "entropy": 1.11816665828228, | |
| "epoch": 1.3132301862961793, | |
| "grad_norm": 0.6773020625114441, | |
| "learning_rate": 3.5481770833333335e-06, | |
| "loss": 1.0893, | |
| "mean_token_accuracy": 0.7571294933557511, | |
| "num_tokens": 6288847.0, | |
| "step": 1040 | |
| }, | |
| { | |
| "entropy": 1.1343947052955627, | |
| "epoch": 1.325860435743606, | |
| "grad_norm": 0.6566488146781921, | |
| "learning_rate": 3.483072916666667e-06, | |
| "loss": 1.0875, | |
| "mean_token_accuracy": 0.755756102502346, | |
| "num_tokens": 6350161.0, | |
| "step": 1050 | |
| }, | |
| { | |
| "entropy": 1.1109364911913873, | |
| "epoch": 1.3384906851910325, | |
| "grad_norm": 0.6575057506561279, | |
| "learning_rate": 3.41796875e-06, | |
| "loss": 1.0782, | |
| "mean_token_accuracy": 0.7591001376509666, | |
| "num_tokens": 6410972.0, | |
| "step": 1060 | |
| }, | |
| { | |
| "entropy": 1.1165167808532714, | |
| "epoch": 1.3511209346384592, | |
| "grad_norm": 0.6655089259147644, | |
| "learning_rate": 3.3528645833333334e-06, | |
| "loss": 1.0901, | |
| "mean_token_accuracy": 0.7573199763894081, | |
| "num_tokens": 6471984.0, | |
| "step": 1070 | |
| }, | |
| { | |
| "entropy": 1.1066906094551086, | |
| "epoch": 1.3637511840858858, | |
| "grad_norm": 0.6363748908042908, | |
| "learning_rate": 3.287760416666667e-06, | |
| "loss": 1.0716, | |
| "mean_token_accuracy": 0.7598252177238465, | |
| "num_tokens": 6532514.0, | |
| "step": 1080 | |
| }, | |
| { | |
| "entropy": 1.1047193810343743, | |
| "epoch": 1.3763814335333122, | |
| "grad_norm": 0.6684281826019287, | |
| "learning_rate": 3.2226562500000004e-06, | |
| "loss": 1.0823, | |
| "mean_token_accuracy": 0.7593759268522262, | |
| "num_tokens": 6592949.0, | |
| "step": 1090 | |
| }, | |
| { | |
| "entropy": 1.1348285049200058, | |
| "epoch": 1.3890116829807388, | |
| "grad_norm": 0.6439023017883301, | |
| "learning_rate": 3.1575520833333333e-06, | |
| "loss": 1.1031, | |
| "mean_token_accuracy": 0.7526842474937439, | |
| "num_tokens": 6654231.0, | |
| "step": 1100 | |
| }, | |
| { | |
| "entropy": 1.1191302105784415, | |
| "epoch": 1.4016419324281655, | |
| "grad_norm": 0.6556984186172485, | |
| "learning_rate": 3.092447916666667e-06, | |
| "loss": 1.0799, | |
| "mean_token_accuracy": 0.7590983435511589, | |
| "num_tokens": 6714430.0, | |
| "step": 1110 | |
| }, | |
| { | |
| "entropy": 1.093433029949665, | |
| "epoch": 1.4142721818755921, | |
| "grad_norm": 0.6618829965591431, | |
| "learning_rate": 3.0273437500000003e-06, | |
| "loss": 1.0614, | |
| "mean_token_accuracy": 0.7611085593700408, | |
| "num_tokens": 6774176.0, | |
| "step": 1120 | |
| }, | |
| { | |
| "entropy": 1.135184645652771, | |
| "epoch": 1.4269024313230187, | |
| "grad_norm": 0.6382298469543457, | |
| "learning_rate": 2.962239583333333e-06, | |
| "loss": 1.0939, | |
| "mean_token_accuracy": 0.7532851651310921, | |
| "num_tokens": 6836522.0, | |
| "step": 1130 | |
| }, | |
| { | |
| "entropy": 1.1093149304389953, | |
| "epoch": 1.4395326807704452, | |
| "grad_norm": 0.6382166147232056, | |
| "learning_rate": 2.897135416666667e-06, | |
| "loss": 1.0709, | |
| "mean_token_accuracy": 0.7608326107263566, | |
| "num_tokens": 6896353.0, | |
| "step": 1140 | |
| }, | |
| { | |
| "entropy": 1.1047044202685357, | |
| "epoch": 1.4521629302178718, | |
| "grad_norm": 0.6356373429298401, | |
| "learning_rate": 2.8320312500000002e-06, | |
| "loss": 1.0738, | |
| "mean_token_accuracy": 0.7615469440817833, | |
| "num_tokens": 6956828.0, | |
| "step": 1150 | |
| }, | |
| { | |
| "entropy": 1.1073317646980285, | |
| "epoch": 1.4647931796652984, | |
| "grad_norm": 0.6593008041381836, | |
| "learning_rate": 2.7669270833333335e-06, | |
| "loss": 1.0589, | |
| "mean_token_accuracy": 0.7599197804927826, | |
| "num_tokens": 7017026.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "entropy": 1.0851576775312424, | |
| "epoch": 1.4774234291127248, | |
| "grad_norm": 0.6466282606124878, | |
| "learning_rate": 2.7018229166666673e-06, | |
| "loss": 1.0584, | |
| "mean_token_accuracy": 0.7626572713255882, | |
| "num_tokens": 7076806.0, | |
| "step": 1170 | |
| }, | |
| { | |
| "entropy": 1.1103300124406814, | |
| "epoch": 1.4900536785601517, | |
| "grad_norm": 0.6285493969917297, | |
| "learning_rate": 2.63671875e-06, | |
| "loss": 1.0753, | |
| "mean_token_accuracy": 0.7593718692660332, | |
| "num_tokens": 7137946.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "entropy": 1.1066975593566895, | |
| "epoch": 1.502683928007578, | |
| "grad_norm": 0.6664257645606995, | |
| "learning_rate": 2.5716145833333334e-06, | |
| "loss": 1.0642, | |
| "mean_token_accuracy": 0.7612839996814728, | |
| "num_tokens": 7200103.0, | |
| "step": 1190 | |
| }, | |
| { | |
| "entropy": 1.0994308680295943, | |
| "epoch": 1.5153141774550047, | |
| "grad_norm": 0.683022141456604, | |
| "learning_rate": 2.506510416666667e-06, | |
| "loss": 1.0726, | |
| "mean_token_accuracy": 0.7611020535230637, | |
| "num_tokens": 7259051.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "entropy": 1.1130555748939515, | |
| "epoch": 1.5279444269024314, | |
| "grad_norm": 0.6556797623634338, | |
| "learning_rate": 2.44140625e-06, | |
| "loss": 1.0758, | |
| "mean_token_accuracy": 0.7601210430264473, | |
| "num_tokens": 7318904.0, | |
| "step": 1210 | |
| }, | |
| { | |
| "entropy": 1.112100276350975, | |
| "epoch": 1.5405746763498578, | |
| "grad_norm": 0.6336252689361572, | |
| "learning_rate": 2.3763020833333338e-06, | |
| "loss": 1.0691, | |
| "mean_token_accuracy": 0.7613141894340515, | |
| "num_tokens": 7378611.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "entropy": 1.1018309980630874, | |
| "epoch": 1.5532049257972846, | |
| "grad_norm": 0.6907696723937988, | |
| "learning_rate": 2.3111979166666667e-06, | |
| "loss": 1.074, | |
| "mean_token_accuracy": 0.7605119064450264, | |
| "num_tokens": 7438960.0, | |
| "step": 1230 | |
| }, | |
| { | |
| "entropy": 1.071268692612648, | |
| "epoch": 1.565835175244711, | |
| "grad_norm": 0.6463876962661743, | |
| "learning_rate": 2.2460937500000004e-06, | |
| "loss": 1.0413, | |
| "mean_token_accuracy": 0.7670892596244812, | |
| "num_tokens": 7497275.0, | |
| "step": 1240 | |
| }, | |
| { | |
| "entropy": 1.134592017531395, | |
| "epoch": 1.5784654246921377, | |
| "grad_norm": 0.6366226077079773, | |
| "learning_rate": 2.1809895833333337e-06, | |
| "loss": 1.092, | |
| "mean_token_accuracy": 0.7548690542578698, | |
| "num_tokens": 7558285.0, | |
| "step": 1250 | |
| }, | |
| { | |
| "entropy": 1.1141762882471085, | |
| "epoch": 1.5910956741395643, | |
| "grad_norm": 0.6590870022773743, | |
| "learning_rate": 2.1158854166666666e-06, | |
| "loss": 1.0752, | |
| "mean_token_accuracy": 0.7603784337639808, | |
| "num_tokens": 7620287.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "entropy": 1.09154414832592, | |
| "epoch": 1.6037259235869907, | |
| "grad_norm": 0.656830370426178, | |
| "learning_rate": 2.0507812500000003e-06, | |
| "loss": 1.0593, | |
| "mean_token_accuracy": 0.76341772377491, | |
| "num_tokens": 7681170.0, | |
| "step": 1270 | |
| }, | |
| { | |
| "entropy": 1.10728869587183, | |
| "epoch": 1.6163561730344176, | |
| "grad_norm": 0.6511245965957642, | |
| "learning_rate": 1.9856770833333336e-06, | |
| "loss": 1.0724, | |
| "mean_token_accuracy": 0.7592613711953163, | |
| "num_tokens": 7741125.0, | |
| "step": 1280 | |
| }, | |
| { | |
| "entropy": 1.1026839420199395, | |
| "epoch": 1.628986422481844, | |
| "grad_norm": 0.648682713508606, | |
| "learning_rate": 1.920572916666667e-06, | |
| "loss": 1.0678, | |
| "mean_token_accuracy": 0.7615165829658508, | |
| "num_tokens": 7801002.0, | |
| "step": 1290 | |
| }, | |
| { | |
| "entropy": 1.113681361079216, | |
| "epoch": 1.6416166719292706, | |
| "grad_norm": 0.6691455245018005, | |
| "learning_rate": 1.8554687500000002e-06, | |
| "loss": 1.0662, | |
| "mean_token_accuracy": 0.76031324416399, | |
| "num_tokens": 7861077.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "entropy": 1.0854344859719276, | |
| "epoch": 1.6542469213766973, | |
| "grad_norm": 0.6221432685852051, | |
| "learning_rate": 1.7903645833333335e-06, | |
| "loss": 1.0564, | |
| "mean_token_accuracy": 0.7638715595006943, | |
| "num_tokens": 7920955.0, | |
| "step": 1310 | |
| }, | |
| { | |
| "entropy": 1.1128123462200166, | |
| "epoch": 1.6668771708241237, | |
| "grad_norm": 0.6376025080680847, | |
| "learning_rate": 1.7252604166666668e-06, | |
| "loss": 1.0803, | |
| "mean_token_accuracy": 0.7579856783151626, | |
| "num_tokens": 7981933.0, | |
| "step": 1320 | |
| }, | |
| { | |
| "entropy": 1.105194841325283, | |
| "epoch": 1.6795074202715503, | |
| "grad_norm": 0.712565541267395, | |
| "learning_rate": 1.6601562500000001e-06, | |
| "loss": 1.0758, | |
| "mean_token_accuracy": 0.7577270165085792, | |
| "num_tokens": 8042084.0, | |
| "step": 1330 | |
| }, | |
| { | |
| "entropy": 1.1065697744488716, | |
| "epoch": 1.692137669718977, | |
| "grad_norm": 0.6228471994400024, | |
| "learning_rate": 1.5950520833333336e-06, | |
| "loss": 1.0731, | |
| "mean_token_accuracy": 0.7590463057160377, | |
| "num_tokens": 8102976.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "entropy": 1.1064435616135597, | |
| "epoch": 1.7047679191664036, | |
| "grad_norm": 0.6447433829307556, | |
| "learning_rate": 1.5299479166666667e-06, | |
| "loss": 1.0708, | |
| "mean_token_accuracy": 0.7608707517385482, | |
| "num_tokens": 8163206.0, | |
| "step": 1350 | |
| }, | |
| { | |
| "entropy": 1.1046179130673408, | |
| "epoch": 1.7173981686138302, | |
| "grad_norm": 0.6594550609588623, | |
| "learning_rate": 1.46484375e-06, | |
| "loss": 1.0606, | |
| "mean_token_accuracy": 0.7627649754285812, | |
| "num_tokens": 8224905.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "entropy": 1.1129515051841736, | |
| "epoch": 1.7300284180612566, | |
| "grad_norm": 0.6550594568252563, | |
| "learning_rate": 1.3997395833333335e-06, | |
| "loss": 1.0741, | |
| "mean_token_accuracy": 0.7577028945088387, | |
| "num_tokens": 8286587.0, | |
| "step": 1370 | |
| }, | |
| { | |
| "entropy": 1.094475807249546, | |
| "epoch": 1.7426586675086833, | |
| "grad_norm": 0.6420894265174866, | |
| "learning_rate": 1.3346354166666666e-06, | |
| "loss": 1.0597, | |
| "mean_token_accuracy": 0.7612502560019493, | |
| "num_tokens": 8346283.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "entropy": 1.1026990562677383, | |
| "epoch": 1.75528891695611, | |
| "grad_norm": 0.6647622585296631, | |
| "learning_rate": 1.2695312500000002e-06, | |
| "loss": 1.0705, | |
| "mean_token_accuracy": 0.7592111378908157, | |
| "num_tokens": 8406590.0, | |
| "step": 1390 | |
| }, | |
| { | |
| "entropy": 1.1024970307946205, | |
| "epoch": 1.7679191664035365, | |
| "grad_norm": 0.6625591516494751, | |
| "learning_rate": 1.2044270833333335e-06, | |
| "loss": 1.0783, | |
| "mean_token_accuracy": 0.756389918923378, | |
| "num_tokens": 8467230.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "entropy": 1.1201951175928115, | |
| "epoch": 1.7805494158509632, | |
| "grad_norm": 0.6827495098114014, | |
| "learning_rate": 1.1393229166666668e-06, | |
| "loss": 1.0869, | |
| "mean_token_accuracy": 0.756199948489666, | |
| "num_tokens": 8527820.0, | |
| "step": 1410 | |
| }, | |
| { | |
| "entropy": 1.1082668006420135, | |
| "epoch": 1.7931796652983896, | |
| "grad_norm": 0.6496292948722839, | |
| "learning_rate": 1.07421875e-06, | |
| "loss": 1.0638, | |
| "mean_token_accuracy": 0.7621515318751335, | |
| "num_tokens": 8587544.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "entropy": 1.0907854005694388, | |
| "epoch": 1.8058099147458162, | |
| "grad_norm": 0.6577737927436829, | |
| "learning_rate": 1.0091145833333334e-06, | |
| "loss": 1.0587, | |
| "mean_token_accuracy": 0.7625794589519501, | |
| "num_tokens": 8647424.0, | |
| "step": 1430 | |
| }, | |
| { | |
| "entropy": 1.0947823762893676, | |
| "epoch": 1.8184401641932428, | |
| "grad_norm": 0.6546240448951721, | |
| "learning_rate": 9.440104166666668e-07, | |
| "loss": 1.0648, | |
| "mean_token_accuracy": 0.7618604898452759, | |
| "num_tokens": 8706635.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "entropy": 1.1016521960496903, | |
| "epoch": 1.8310704136406692, | |
| "grad_norm": 0.6349791884422302, | |
| "learning_rate": 8.789062500000001e-07, | |
| "loss": 1.0615, | |
| "mean_token_accuracy": 0.7619734451174736, | |
| "num_tokens": 8766624.0, | |
| "step": 1450 | |
| }, | |
| { | |
| "entropy": 1.110970026254654, | |
| "epoch": 1.843700663088096, | |
| "grad_norm": 0.6273230314254761, | |
| "learning_rate": 8.138020833333334e-07, | |
| "loss": 1.079, | |
| "mean_token_accuracy": 0.7600797146558762, | |
| "num_tokens": 8826556.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "entropy": 1.1075817868113518, | |
| "epoch": 1.8563309125355225, | |
| "grad_norm": 0.6720101833343506, | |
| "learning_rate": 7.486979166666668e-07, | |
| "loss": 1.0695, | |
| "mean_token_accuracy": 0.759764339029789, | |
| "num_tokens": 8887460.0, | |
| "step": 1470 | |
| }, | |
| { | |
| "entropy": 1.0957570180296898, | |
| "epoch": 1.8689611619829491, | |
| "grad_norm": 0.6578065752983093, | |
| "learning_rate": 6.835937500000001e-07, | |
| "loss": 1.0658, | |
| "mean_token_accuracy": 0.7630386680364609, | |
| "num_tokens": 8947077.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "entropy": 1.1054737836122512, | |
| "epoch": 1.8815914114303758, | |
| "grad_norm": 0.6177386045455933, | |
| "learning_rate": 6.184895833333334e-07, | |
| "loss": 1.062, | |
| "mean_token_accuracy": 0.7603132933378219, | |
| "num_tokens": 9008717.0, | |
| "step": 1490 | |
| }, | |
| { | |
| "entropy": 1.1026621460914612, | |
| "epoch": 1.8942216608778022, | |
| "grad_norm": 0.6226282119750977, | |
| "learning_rate": 5.533854166666667e-07, | |
| "loss": 1.0706, | |
| "mean_token_accuracy": 0.7603669881820678, | |
| "num_tokens": 9068623.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "entropy": 1.1010279595851897, | |
| "epoch": 1.906851910325229, | |
| "grad_norm": 0.6504780650138855, | |
| "learning_rate": 4.8828125e-07, | |
| "loss": 1.0583, | |
| "mean_token_accuracy": 0.7632956698536872, | |
| "num_tokens": 9129086.0, | |
| "step": 1510 | |
| }, | |
| { | |
| "entropy": 1.1021641314029693, | |
| "epoch": 1.9194821597726555, | |
| "grad_norm": 0.667875349521637, | |
| "learning_rate": 4.2317708333333337e-07, | |
| "loss": 1.0698, | |
| "mean_token_accuracy": 0.7609776973724365, | |
| "num_tokens": 9189845.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "entropy": 1.0909265503287315, | |
| "epoch": 1.932112409220082, | |
| "grad_norm": 0.650221586227417, | |
| "learning_rate": 3.5807291666666667e-07, | |
| "loss": 1.0541, | |
| "mean_token_accuracy": 0.7616324663162232, | |
| "num_tokens": 9250808.0, | |
| "step": 1530 | |
| }, | |
| { | |
| "entropy": 1.1204875528812408, | |
| "epoch": 1.9447426586675087, | |
| "grad_norm": 0.6560048460960388, | |
| "learning_rate": 2.9296875000000003e-07, | |
| "loss": 1.0821, | |
| "mean_token_accuracy": 0.7588548183441162, | |
| "num_tokens": 9312275.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "entropy": 1.0873224779963493, | |
| "epoch": 1.9573729081149351, | |
| "grad_norm": 0.6481816172599792, | |
| "learning_rate": 2.2786458333333333e-07, | |
| "loss": 1.058, | |
| "mean_token_accuracy": 0.762654073536396, | |
| "num_tokens": 9372216.0, | |
| "step": 1550 | |
| }, | |
| { | |
| "entropy": 1.1072645708918571, | |
| "epoch": 1.970003157562362, | |
| "grad_norm": 0.6645349264144897, | |
| "learning_rate": 1.627604166666667e-07, | |
| "loss": 1.07, | |
| "mean_token_accuracy": 0.7614389002323151, | |
| "num_tokens": 9431986.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "entropy": 1.1167670994997025, | |
| "epoch": 1.9826334070097884, | |
| "grad_norm": 0.6455146670341492, | |
| "learning_rate": 9.765625e-08, | |
| "loss": 1.0754, | |
| "mean_token_accuracy": 0.7594234853982925, | |
| "num_tokens": 9492961.0, | |
| "step": 1570 | |
| }, | |
| { | |
| "entropy": 1.104009985923767, | |
| "epoch": 1.995263656457215, | |
| "grad_norm": 0.6205505132675171, | |
| "learning_rate": 3.2552083333333335e-08, | |
| "loss": 1.0718, | |
| "mean_token_accuracy": 0.7597839057445526, | |
| "num_tokens": 9553806.0, | |
| "step": 1580 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1584, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.4237850982977536e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |