Instructions to use Nike-Hanmatheekuna/nllb-200-3.3B-full with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Nike-Hanmatheekuna/nllb-200-3.3B-full with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("Nike-Hanmatheekuna/nllb-200-3.3B-full") model = AutoModelForMultimodalLM.from_pretrained("Nike-Hanmatheekuna/nllb-200-3.3B-full") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9985620955893846, | |
| "eval_steps": 500, | |
| "global_step": 2997, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0500140664561908, | |
| "grad_norm": 1.5154740810394287, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.6338, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1000281329123816, | |
| "grad_norm": 0.7568167448043823, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.1959, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1500421993685724, | |
| "grad_norm": 0.6318637728691101, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8316, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2000562658247632, | |
| "grad_norm": 0.49124225974082947, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.7008, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.25007033228095404, | |
| "grad_norm": 0.49193131923675537, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.6234, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3000843987371448, | |
| "grad_norm": 0.5137419700622559, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5887, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.35009846519333565, | |
| "grad_norm": 0.4855007231235504, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.5523, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4001125316495264, | |
| "grad_norm": 0.4011429250240326, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.5204, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.45012659810571726, | |
| "grad_norm": 0.4894815683364868, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.4922, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5001406645619081, | |
| "grad_norm": 0.4187089204788208, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.4676, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5501547310180989, | |
| "grad_norm": 0.37147971987724304, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.4564, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6001687974742896, | |
| "grad_norm": 0.36486196517944336, | |
| "learning_rate": 2e-05, | |
| "loss": 0.451, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6501828639304804, | |
| "grad_norm": 0.39784976840019226, | |
| "learning_rate": 1.999576008468646e-05, | |
| "loss": 0.4313, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7001969303866713, | |
| "grad_norm": 0.4364568293094635, | |
| "learning_rate": 1.9983043934122208e-05, | |
| "loss": 0.4132, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7502109968428621, | |
| "grad_norm": 0.7991167306900024, | |
| "learning_rate": 1.9961862331387545e-05, | |
| "loss": 0.4103, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8002250632990529, | |
| "grad_norm": 0.3835698068141937, | |
| "learning_rate": 1.9932233238122834e-05, | |
| "loss": 0.3919, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8502391297552436, | |
| "grad_norm": 0.40175750851631165, | |
| "learning_rate": 1.9894181779297323e-05, | |
| "loss": 0.3846, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9002531962114345, | |
| "grad_norm": 0.3462737202644348, | |
| "learning_rate": 1.984774022190361e-05, | |
| "loss": 0.3712, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9502672626676253, | |
| "grad_norm": 0.31011104583740234, | |
| "learning_rate": 1.9792947947595772e-05, | |
| "loss": 0.3672, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0002813291238162, | |
| "grad_norm": 0.34971198439598083, | |
| "learning_rate": 1.972985141929439e-05, | |
| "loss": 0.3635, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.050295395580007, | |
| "grad_norm": 0.3588427007198334, | |
| "learning_rate": 1.9658504141786775e-05, | |
| "loss": 0.3368, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.1003094620361977, | |
| "grad_norm": 0.32608523964881897, | |
| "learning_rate": 1.9578966616355823e-05, | |
| "loss": 0.3279, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.1503235284923885, | |
| "grad_norm": 0.3917510509490967, | |
| "learning_rate": 1.9491306289475957e-05, | |
| "loss": 0.3239, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2003375949485793, | |
| "grad_norm": 0.3702155649662018, | |
| "learning_rate": 1.9395597495619634e-05, | |
| "loss": 0.3147, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.25035166140477, | |
| "grad_norm": 0.34320342540740967, | |
| "learning_rate": 1.9291921394223e-05, | |
| "loss": 0.3178, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3003657278609608, | |
| "grad_norm": 0.38336554169654846, | |
| "learning_rate": 1.918036590086405e-05, | |
| "loss": 0.3195, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.3503797943171518, | |
| "grad_norm": 0.4073985517024994, | |
| "learning_rate": 1.90610256127117e-05, | |
| "loss": 0.308, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.4003938607733426, | |
| "grad_norm": 0.37673142552375793, | |
| "learning_rate": 1.8934001728309003e-05, | |
| "loss": 0.3096, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.4504079272295334, | |
| "grad_norm": 0.36145591735839844, | |
| "learning_rate": 1.8799401961758492e-05, | |
| "loss": 0.3121, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.5004219936857242, | |
| "grad_norm": 0.4171595275402069, | |
| "learning_rate": 1.865734045138245e-05, | |
| "loss": 0.3017, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.5511550123472226, | |
| "grad_norm": 0.32917237281799316, | |
| "learning_rate": 1.1144858589642251e-05, | |
| "loss": 0.2992, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.6011690788034134, | |
| "grad_norm": 0.3282073736190796, | |
| "learning_rate": 1.0564650370835772e-05, | |
| "loss": 0.293, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.6511831452596044, | |
| "grad_norm": 0.31431856751441956, | |
| "learning_rate": 9.982527302252135e-06, | |
| "loss": 0.2969, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.7011972117157952, | |
| "grad_norm": 0.3012774586677551, | |
| "learning_rate": 9.40046348731131e-06, | |
| "loss": 0.2954, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.751211278171986, | |
| "grad_norm": 0.2984926104545593, | |
| "learning_rate": 8.820432828491542e-06, | |
| "loss": 0.2885, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.8012253446281767, | |
| "grad_norm": 0.30490660667419434, | |
| "learning_rate": 8.244402333405252e-06, | |
| "loss": 0.2841, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.8512394110843675, | |
| "grad_norm": 0.3301903009414673, | |
| "learning_rate": 7.674325444256899e-06, | |
| "loss": 0.2935, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.9012534775405583, | |
| "grad_norm": 0.3045901954174042, | |
| "learning_rate": 7.112135413304042e-06, | |
| "loss": 0.2855, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.951267543996749, | |
| "grad_norm": 0.310871958732605, | |
| "learning_rate": 6.55973874678682e-06, | |
| "loss": 0.2831, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.00128161045294, | |
| "grad_norm": 0.3118513822555542, | |
| "learning_rate": 6.0190087395588596e-06, | |
| "loss": 0.2823, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0512956769091306, | |
| "grad_norm": 0.2910955250263214, | |
| "learning_rate": 5.491779122345093e-06, | |
| "loss": 0.2576, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.1013097433653214, | |
| "grad_norm": 0.3105012774467468, | |
| "learning_rate": 4.979837843169959e-06, | |
| "loss": 0.2522, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.151323809821512, | |
| "grad_norm": 0.31710630655288696, | |
| "learning_rate": 4.484921004044509e-06, | |
| "loss": 0.2546, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.201337876277703, | |
| "grad_norm": 0.41157081723213196, | |
| "learning_rate": 4.008706973474391e-06, | |
| "loss": 0.2531, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.2513519427338937, | |
| "grad_norm": 0.3166573941707611, | |
| "learning_rate": 3.5528106947544626e-06, | |
| "loss": 0.2519, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.301366009190085, | |
| "grad_norm": 0.3242489695549011, | |
| "learning_rate": 3.118778209351808e-06, | |
| "loss": 0.254, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.3513800756462757, | |
| "grad_norm": 0.34881240129470825, | |
| "learning_rate": 2.7080814139495402e-06, | |
| "loss": 0.2526, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.4013941421024665, | |
| "grad_norm": 0.3267311751842499, | |
| "learning_rate": 2.322113068931391e-06, | |
| "loss": 0.251, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.4514082085586573, | |
| "grad_norm": 0.3213905394077301, | |
| "learning_rate": 1.9621820752343324e-06, | |
| "loss": 0.246, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.501422275014848, | |
| "grad_norm": 0.48464563488960266, | |
| "learning_rate": 1.629509035586484e-06, | |
| "loss": 0.2527, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.551436341471039, | |
| "grad_norm": 0.31706663966178894, | |
| "learning_rate": 1.3252221151830513e-06, | |
| "loss": 0.2465, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.6014504079272296, | |
| "grad_norm": 0.33972597122192383, | |
| "learning_rate": 1.0503532158376584e-06, | |
| "loss": 0.2483, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.6514644743834204, | |
| "grad_norm": 0.2904072105884552, | |
| "learning_rate": 8.058344765833171e-07, | |
| "loss": 0.2465, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.701478540839611, | |
| "grad_norm": 0.32852381467819214, | |
| "learning_rate": 5.924951125902545e-07, | |
| "loss": 0.2512, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.751492607295802, | |
| "grad_norm": 0.2948131263256073, | |
| "learning_rate": 4.11058603120511e-07, | |
| "loss": 0.2483, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.8015066737519927, | |
| "grad_norm": 0.33766064047813416, | |
| "learning_rate": 2.6214023805552826e-07, | |
| "loss": 0.2496, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.8515207402081835, | |
| "grad_norm": 0.29850533604621887, | |
| "learning_rate": 1.462450313169983e-07, | |
| "loss": 0.2444, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.9015348066643742, | |
| "grad_norm": 0.33009734749794006, | |
| "learning_rate": 6.376600825699463e-08, | |
| "loss": 0.2478, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.951548873120565, | |
| "grad_norm": 0.5311033725738525, | |
| "learning_rate": 1.49828728252277e-08, | |
| "loss": 0.2412, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.9985620955893846, | |
| "step": 2997, | |
| "total_flos": 2.5672199949139968e+17, | |
| "train_loss": 0.13155911801694273, | |
| "train_runtime": 22482.9111, | |
| "train_samples_per_second": 8.537, | |
| "train_steps_per_second": 0.133 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2997, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "total_flos": 2.5672199949139968e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |