{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "038f9418a1b642dcae983797720afdee": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a77615fc48ed4d97bdd324a2a1ce5517", "IPY_MODEL_edacb8c70ffe431ab02bf3f316674e74", "IPY_MODEL_55cc0de6656f4e7bb92a61ccfb40cfac" ], "layout": "IPY_MODEL_be22793949384f6c915eb431c772ea1c" } }, "a77615fc48ed4d97bdd324a2a1ce5517": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_49e1253a92bb4dd59025cd07f1c4139c", "placeholder": "​", "style": "IPY_MODEL_026d616290f24d1b8b7f90dcb3dfd5a3", "value": "model-00001-of-00004.safetensors: 100%" } }, "edacb8c70ffe431ab02bf3f316674e74": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_af284825caed4e85b7877adc9bb623d6", "max": 4976698672, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9764d10debe24acb8642158d5878cad0", "value": 4976698672 } }, "55cc0de6656f4e7bb92a61ccfb40cfac": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ab38498225a742c598eaa37cf6788c05", "placeholder": "​", "style": "IPY_MODEL_da4570460aae482abc163ec8275131fb", "value": " 4.98G/4.98G [04:28<00:00, 217MB/s]" } }, "be22793949384f6c915eb431c772ea1c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "49e1253a92bb4dd59025cd07f1c4139c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "026d616290f24d1b8b7f90dcb3dfd5a3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "af284825caed4e85b7877adc9bb623d6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9764d10debe24acb8642158d5878cad0": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "ab38498225a742c598eaa37cf6788c05": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "da4570460aae482abc163ec8275131fb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "95ea82397c9947ad9eaec08c89150061": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9ced7c8f0a7c421182b6c5c18e42008a", "IPY_MODEL_2d920152d93c4c7c85cef5096ddcd188", "IPY_MODEL_55d3f7b37a154a6d8e8325d0008af4f5" ], "layout": "IPY_MODEL_44b13bcb54404a048daaa8b12113fe10" } }, "9ced7c8f0a7c421182b6c5c18e42008a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9e35dc55dad34141811bf2cbae946702", "placeholder": "​", "style": "IPY_MODEL_246ea1a7c4e94ee592914314668ff68b", "value": "model-00002-of-00004.safetensors: 100%" } }, "2d920152d93c4c7c85cef5096ddcd188": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_31e16c31d03e4ec68666ed806533d66c", "max": 4999802720, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8a16684b5cea4655992df2f62513bd96", "value": 4999802720 } }, "55d3f7b37a154a6d8e8325d0008af4f5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_feaf88667a9a42ab98ef7d858ee3a5a7", "placeholder": "​", "style": "IPY_MODEL_f4eb3f443a5f4194a0cf92b8e6d1a2aa", "value": " 5.00G/5.00G [03:09<00:00, 20.6MB/s]" } }, "44b13bcb54404a048daaa8b12113fe10": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9e35dc55dad34141811bf2cbae946702": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "246ea1a7c4e94ee592914314668ff68b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "31e16c31d03e4ec68666ed806533d66c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8a16684b5cea4655992df2f62513bd96": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "feaf88667a9a42ab98ef7d858ee3a5a7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f4eb3f443a5f4194a0cf92b8e6d1a2aa": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3cae3c43b5a947fbaf62491a8750c27d": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_57ba4b93f3f145da8ada6fd6f400a55b", "IPY_MODEL_3912c175d4834d4e8e4daa29ec181ea6", "IPY_MODEL_2d7cb2dd6d744b159ffea8bbc321b172" ], "layout": "IPY_MODEL_399339a2b46248c083d255c7dfe6a53c" } }, "57ba4b93f3f145da8ada6fd6f400a55b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1edc550d76014b80bbb08375425e5529", "placeholder": "​", "style": "IPY_MODEL_6120c3b37aec428cb8e46a9e83e8f970", "value": "model-00003-of-00004.safetensors: 100%" } }, "3912c175d4834d4e8e4daa29ec181ea6": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b649fc3271da4d20b2f8a91277eff699", "max": 4915916176, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_0ca38babe0c9446fbf42c8e69398b020", "value": 4915916176 } }, "2d7cb2dd6d744b159ffea8bbc321b172": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5d3045cfb7f54a1e8930becfd8dd2234", "placeholder": "​", "style": "IPY_MODEL_df17a9b190b74c36be3ee8ca781712c5", "value": " 4.92G/4.92G [06:24<00:00, 32.8MB/s]" } }, "399339a2b46248c083d255c7dfe6a53c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1edc550d76014b80bbb08375425e5529": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6120c3b37aec428cb8e46a9e83e8f970": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b649fc3271da4d20b2f8a91277eff699": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0ca38babe0c9446fbf42c8e69398b020": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "5d3045cfb7f54a1e8930becfd8dd2234": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "df17a9b190b74c36be3ee8ca781712c5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5d784bc8265c433196cea6b6e9dcc527": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e31256ccfdcb4f40886732c0dd0d1664", "IPY_MODEL_baca6985490742f29f2c8e6bbc8ad1aa", "IPY_MODEL_5567210d612e4feab98db5aa262891df" ], "layout": "IPY_MODEL_d893c245a1344fc7afae63a4b365f689" } }, "e31256ccfdcb4f40886732c0dd0d1664": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b9fa8463d1894832b39d27bbcfb72249", "placeholder": "​", "style": "IPY_MODEL_d9dd6d0d77154ed59a7179adca0ec04d", "value": "model-00004-of-00004.safetensors: 100%" } }, "baca6985490742f29f2c8e6bbc8ad1aa": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dd213f00f50241b28afe6e610e6ea3ed", "max": 1168138808, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_32499c119f5e42a58b19d8f764bff65c", "value": 1168138808 } }, "5567210d612e4feab98db5aa262891df": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_21877ae02d8b4c1eaf78a3115a4957bd", "placeholder": "​", "style": "IPY_MODEL_c534c9d6492f472fa2bddc4dd35a4ad4", "value": " 1.17G/1.17G [01:11<00:00, 15.9MB/s]" } }, "d893c245a1344fc7afae63a4b365f689": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b9fa8463d1894832b39d27bbcfb72249": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d9dd6d0d77154ed59a7179adca0ec04d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "dd213f00f50241b28afe6e610e6ea3ed": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "32499c119f5e42a58b19d8f764bff65c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "21877ae02d8b4c1eaf78a3115a4957bd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c534c9d6492f472fa2bddc4dd35a4ad4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oXhNRG2fq5Nf", "outputId": "84fcca58-5f6e-490b-cef4-1c2a3e4b84f4" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.12/dist-packages (0.48.2)\n", "Requirement already satisfied: accelerate in /usr/local/lib/python3.12/dist-packages (1.12.0)\n", "Requirement already satisfied: xformers==0.0.33.post1 in /usr/local/lib/python3.12/dist-packages (0.0.33.post1)\n", "Requirement already satisfied: peft in /usr/local/lib/python3.12/dist-packages (0.18.0)\n", "Requirement already satisfied: trl in /usr/local/lib/python3.12/dist-packages (0.25.1)\n", "Requirement already satisfied: triton in /usr/local/lib/python3.12/dist-packages (3.5.0)\n", "Requirement already satisfied: cut_cross_entropy in /usr/local/lib/python3.12/dist-packages (25.1.1)\n", "Requirement already satisfied: unsloth_zoo in /usr/local/lib/python3.12/dist-packages (2025.11.5)\n", "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.12/dist-packages (0.2.1)\n", "Requirement already satisfied: protobuf in /usr/local/lib/python3.12/dist-packages (5.29.5)\n", "Requirement already satisfied: datasets==4.3.0 in /usr/local/lib/python3.12/dist-packages (4.3.0)\n", "Requirement already satisfied: huggingface_hub>=0.34.0 in /usr/local/lib/python3.12/dist-packages (0.36.0)\n", "Requirement already satisfied: hf_transfer in /usr/local/lib/python3.12/dist-packages (0.1.9)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (3.20.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (2.0.2)\n", "Requirement already satisfied: pyarrow>=21.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (22.0.0)\n", "Requirement already satisfied: dill<0.4.1,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (0.3.8)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (2.2.2)\n", "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (2.32.4)\n", "Requirement already satisfied: httpx<1.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (0.28.1)\n", "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (4.67.1)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (3.6.0)\n", "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (0.70.16)\n", "Requirement already satisfied: fsspec<=2025.9.0,>=2023.1.0 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (2025.3.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (25.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (6.0.3)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub>=0.34.0) (4.15.0)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub>=0.34.0) (1.2.0)\n", "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (3.13.2)\n", "Requirement already satisfied: anyio in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (4.11.0)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (2025.11.12)\n", "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (1.0.9)\n", "Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (3.11)\n", "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.12/dist-packages (from httpcore==1.*->httpx<1.0.0->datasets==4.3.0) (0.16.0)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets==4.3.0) (3.4.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets==4.3.0) (2.5.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets==4.3.0) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets==4.3.0) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets==4.3.0) (2025.2)\n", "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (2.6.1)\n", "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (1.4.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (25.4.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (1.8.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (6.7.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (0.4.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (1.22.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets==4.3.0) (1.17.0)\n", "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.12/dist-packages (from anyio->httpx<1.0.0->datasets==4.3.0) (1.3.1)\n", "Requirement already satisfied: unsloth in /usr/local/lib/python3.12/dist-packages (2025.11.4)\n" ] } ], "source": [ "import os, re\n", "\n", "import torch; v = re.match(r\"[0-9]{1,}\\.[0-9]{1,}\", str(torch.__version__)).group(0)\n", "xformers = \"xformers==\" + (\"0.0.33.post1\" if v==\"2.9\" else \"0.0.32.post2\" if v==\"2.8\" else \"0.0.29.post3\")\n", "!pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo\n", "!pip install sentencepiece protobuf \"datasets==4.3.0\" \"huggingface_hub>=0.34.0\" hf_transfer\n", "!pip install --no-deps unsloth" ] }, { "cell_type": "code", "source": [ "import unsloth\n", "from unsloth import FastLanguageModel\n", "import torch\n", "\n", "max_seq_length = 2048\n", "dtype = None\n", "load_in_4bit = True\n", "model_name = \"unsloth/Meta-Llama-3.1-8B-bnb-4bit\"\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = model_name,\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", ")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rHVeLF3Jtlfd", "outputId": "c1eafe1e-ac46-4711-8453-a5b05c6e5b0d" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "🦥 Unsloth Zoo will now patch everything to make training faster!\n", "==((====))== Unsloth 2025.11.4: Fast Llama patching. Transformers: 4.57.2.\n", " \\\\ /| Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n", "O^O/ \\_/ \\ Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0\n", "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]\n", " \"-____-\" Free license: http://github.com/unslothai/unsloth\n", "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" ] } ] }, { "cell_type": "code", "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, # Supports any, but = 0 is optimized\n", " bias = \"none\", # Supports any, but = \"none\" is optimized\n", " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n", " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", " random_state = 61, # Trabzon Plakası\n", " use_rslora = False, # We support rank stabilized LoRA\n", " loftq_config = None, # And LoftQ\n", ")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "P3OKXPPNtuSg", "outputId": "708e7805-423b-4982-d224-a663f0065bd2" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Unsloth 2025.11.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ] }, { "cell_type": "code", "source": [ "qa_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", "\n", "### Instruction:\n", "Answer the user's question accurately based *only* on the provided context.\n", "\n", "### Context:\n", "{}\n", "\n", "### Question:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\" # bunu daha guzel de yazabiliriz kafamdan salladim\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " queries = examples[\"query\"]\n", " original_questions = examples[\"original_question\"]\n", " responses = examples[\"response\"]\n", " texts = []\n", " for query, original_question, response in zip(queries, original_questions, responses):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = qa_prompt.format(query, original_question, response) + EOS_TOKEN\n", " texts.append(text)\n", " return texts\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"meta-math/MetaMathQA\", split = \"train\")" ], "metadata": { "id": "3wjkkNvM0Y71" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "shuffled_dataset = dataset.shuffle(seed=42)\n", "random_dataset = shuffled_dataset.select(range(50000))\n", "test_dataset = shuffled_dataset.select(range(50000,62500)) # %80 = 50k, %20 = 12.5k" ], "metadata": { "id": "S3o5ObKlxZdk" }, "execution_count": 8, "outputs": [] }, { "cell_type": "code", "source": [ "from trl import SFTConfig, SFTTrainer\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = random_dataset,\n", " formatting_func = formatting_prompts_func,\n", " max_seq_length = max_seq_length,\n", " packing = False,\n", " args = SFTConfig(\n", " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " num_train_epochs = 5,\n", " max_steps = 50,\n", " learning_rate = 5e-5,\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.001,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"outputs\",\n", " report_to = \"none\",\n", " ),\n", ")" ], "metadata": { "id": "bVXWwFNCzg1Z" }, "execution_count": 9, "outputs": [] }, { "cell_type": "code", "source": [ "trainer_stats = trainer.train()" ], "metadata": { "id": "0LK85tTpzxos", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "c6890db6-6342-41d5-a554-f738443866e8" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "The model is already on multiple devices. Skipping the move to device specified in `args`.\n", "==((====))== Unsloth - 2x faster free finetuning | Num GPUs used = 1\n", " \\\\ /| Num examples = 50,000 | Num Epochs = 1 | Total steps = 50\n", "O^O/ \\_/ \\ Batch size per device = 8 | Gradient accumulation steps = 4\n", "\\ / Data Parallel GPUs = 1 | Total batch size (8 x 4 x 1) = 32\n", " \"-____-\" Trainable parameters = 83,886,080 of 8,114,147,328 (1.03% trained)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Unsloth: Will smartly offload gradients to save VRAM!\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", "
\n", " \n", " \n", " [50/50 37:05, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
11.230200
21.270300
31.299600
41.244500
51.184100
61.111100
71.223400
81.108700
91.065300
101.053800
110.964000
120.902800
130.931900
140.898900
150.874400
160.798700
170.817200
180.772500
190.732200
200.722500
210.668200
220.647300
230.637800
240.636000
250.659600
260.617000
270.650700
280.622400
290.644100
300.623500
310.566500
320.608200
330.593700
340.609600
350.579500
360.597300
370.562200
380.592100
390.562600
400.567200
410.538800
420.590200
430.561600
440.570400
450.529900
460.577500
470.591800
480.585600
490.564800
500.551400

" ] }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", "inputs = tokenizer(\n", "[\n", " qa_prompt.format(\n", " \"What is 3+8?\",\n", " \"What is 3+8?\",\n", " \"\",\n", " )\n", "], return_tensors = \"pt\").to(\"cuda\")\n", "\n", "outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)\n", "tokenizer.batch_decode(outputs)" ], "metadata": { "id": "a-xjHIy269KP", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "122559de-ac3e-4e20-d0d3-8ce7e89fd395" }, "execution_count": 11, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[\"<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nAnswer the user's question accurately based *only* on the provided context.\\n\\n### Context:\\nWhat is 3+8?\\n\\n### Question:\\nWhat is 3+8?\\n\\n### Response:\\nThe answer is 11.\\nThe sum of 3 and 8 is 11.\\n#### 3+8=11\\nThe answer is: 11<|end_of_text|>\"]" ] }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "code", "source": [ "model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 413, "referenced_widgets": [ "038f9418a1b642dcae983797720afdee", "a77615fc48ed4d97bdd324a2a1ce5517", "edacb8c70ffe431ab02bf3f316674e74", "55cc0de6656f4e7bb92a61ccfb40cfac", "be22793949384f6c915eb431c772ea1c", "49e1253a92bb4dd59025cd07f1c4139c", "026d616290f24d1b8b7f90dcb3dfd5a3", "af284825caed4e85b7877adc9bb623d6", "9764d10debe24acb8642158d5878cad0", "ab38498225a742c598eaa37cf6788c05", "da4570460aae482abc163ec8275131fb", "95ea82397c9947ad9eaec08c89150061", "9ced7c8f0a7c421182b6c5c18e42008a", "2d920152d93c4c7c85cef5096ddcd188", "55d3f7b37a154a6d8e8325d0008af4f5", "44b13bcb54404a048daaa8b12113fe10", "9e35dc55dad34141811bf2cbae946702", "246ea1a7c4e94ee592914314668ff68b", "31e16c31d03e4ec68666ed806533d66c", "8a16684b5cea4655992df2f62513bd96", "feaf88667a9a42ab98ef7d858ee3a5a7", "f4eb3f443a5f4194a0cf92b8e6d1a2aa", "3cae3c43b5a947fbaf62491a8750c27d", "57ba4b93f3f145da8ada6fd6f400a55b", "3912c175d4834d4e8e4daa29ec181ea6", "2d7cb2dd6d744b159ffea8bbc321b172", "399339a2b46248c083d255c7dfe6a53c", "1edc550d76014b80bbb08375425e5529", "6120c3b37aec428cb8e46a9e83e8f970", "b649fc3271da4d20b2f8a91277eff699", "0ca38babe0c9446fbf42c8e69398b020", "5d3045cfb7f54a1e8930becfd8dd2234", "df17a9b190b74c36be3ee8ca781712c5", "5d784bc8265c433196cea6b6e9dcc527", "e31256ccfdcb4f40886732c0dd0d1664", "baca6985490742f29f2c8e6bbc8ad1aa", "5567210d612e4feab98db5aa262891df", "d893c245a1344fc7afae63a4b365f689", "b9fa8463d1894832b39d27bbcfb72249", "d9dd6d0d77154ed59a7179adca0ec04d", "dd213f00f50241b28afe6e610e6ea3ed", "32499c119f5e42a58b19d8f764bff65c", "21877ae02d8b4c1eaf78a3115a4957bd", "c534c9d6492f472fa2bddc4dd35a4ad4" ] }, "id": "UPOtyS1c7q6f", "outputId": "8de042b2-d8c9-4723-bf51-ca391af8e482" }, "execution_count": 12, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Found HuggingFace hub cache directory: /root/.cache/huggingface/hub\n", "Checking cache directory for required files...\n", "Cache check failed: model-00001-of-00004.safetensors not found in local cache.\n", "Not all required files found in cache. Will proceed with downloading.\n", "Checking cache directory for required files...\n", "Cache check failed: tokenizer.model not found in local cache.\n", "Not all required files found in cache. Will proceed with downloading.\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\rUnsloth: Preparing safetensor model files: 0%| | 0/4 [00:00