UDoruk3250 commited on
Commit
366db75
Β·
verified Β·
1 Parent(s): 7c6caff

Upload llama_fine_tuning.ipynb

Browse files
Files changed (1) hide show
  1. llama_fine_tuning.ipynb +2230 -0
llama_fine_tuning.ipynb ADDED
@@ -0,0 +1,2230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU",
17
+ "widgets": {
18
+ "application/vnd.jupyter.widget-state+json": {
19
+ "038f9418a1b642dcae983797720afdee": {
20
+ "model_module": "@jupyter-widgets/controls",
21
+ "model_name": "HBoxModel",
22
+ "model_module_version": "1.5.0",
23
+ "state": {
24
+ "_dom_classes": [],
25
+ "_model_module": "@jupyter-widgets/controls",
26
+ "_model_module_version": "1.5.0",
27
+ "_model_name": "HBoxModel",
28
+ "_view_count": null,
29
+ "_view_module": "@jupyter-widgets/controls",
30
+ "_view_module_version": "1.5.0",
31
+ "_view_name": "HBoxView",
32
+ "box_style": "",
33
+ "children": [
34
+ "IPY_MODEL_a77615fc48ed4d97bdd324a2a1ce5517",
35
+ "IPY_MODEL_edacb8c70ffe431ab02bf3f316674e74",
36
+ "IPY_MODEL_55cc0de6656f4e7bb92a61ccfb40cfac"
37
+ ],
38
+ "layout": "IPY_MODEL_be22793949384f6c915eb431c772ea1c"
39
+ }
40
+ },
41
+ "a77615fc48ed4d97bdd324a2a1ce5517": {
42
+ "model_module": "@jupyter-widgets/controls",
43
+ "model_name": "HTMLModel",
44
+ "model_module_version": "1.5.0",
45
+ "state": {
46
+ "_dom_classes": [],
47
+ "_model_module": "@jupyter-widgets/controls",
48
+ "_model_module_version": "1.5.0",
49
+ "_model_name": "HTMLModel",
50
+ "_view_count": null,
51
+ "_view_module": "@jupyter-widgets/controls",
52
+ "_view_module_version": "1.5.0",
53
+ "_view_name": "HTMLView",
54
+ "description": "",
55
+ "description_tooltip": null,
56
+ "layout": "IPY_MODEL_49e1253a92bb4dd59025cd07f1c4139c",
57
+ "placeholder": "​",
58
+ "style": "IPY_MODEL_026d616290f24d1b8b7f90dcb3dfd5a3",
59
+ "value": "model-00001-of-00004.safetensors: 100%"
60
+ }
61
+ },
62
+ "edacb8c70ffe431ab02bf3f316674e74": {
63
+ "model_module": "@jupyter-widgets/controls",
64
+ "model_name": "FloatProgressModel",
65
+ "model_module_version": "1.5.0",
66
+ "state": {
67
+ "_dom_classes": [],
68
+ "_model_module": "@jupyter-widgets/controls",
69
+ "_model_module_version": "1.5.0",
70
+ "_model_name": "FloatProgressModel",
71
+ "_view_count": null,
72
+ "_view_module": "@jupyter-widgets/controls",
73
+ "_view_module_version": "1.5.0",
74
+ "_view_name": "ProgressView",
75
+ "bar_style": "success",
76
+ "description": "",
77
+ "description_tooltip": null,
78
+ "layout": "IPY_MODEL_af284825caed4e85b7877adc9bb623d6",
79
+ "max": 4976698672,
80
+ "min": 0,
81
+ "orientation": "horizontal",
82
+ "style": "IPY_MODEL_9764d10debe24acb8642158d5878cad0",
83
+ "value": 4976698672
84
+ }
85
+ },
86
+ "55cc0de6656f4e7bb92a61ccfb40cfac": {
87
+ "model_module": "@jupyter-widgets/controls",
88
+ "model_name": "HTMLModel",
89
+ "model_module_version": "1.5.0",
90
+ "state": {
91
+ "_dom_classes": [],
92
+ "_model_module": "@jupyter-widgets/controls",
93
+ "_model_module_version": "1.5.0",
94
+ "_model_name": "HTMLModel",
95
+ "_view_count": null,
96
+ "_view_module": "@jupyter-widgets/controls",
97
+ "_view_module_version": "1.5.0",
98
+ "_view_name": "HTMLView",
99
+ "description": "",
100
+ "description_tooltip": null,
101
+ "layout": "IPY_MODEL_ab38498225a742c598eaa37cf6788c05",
102
+ "placeholder": "​",
103
+ "style": "IPY_MODEL_da4570460aae482abc163ec8275131fb",
104
+ "value": " 4.98G/4.98G [04:28<00:00, 217MB/s]"
105
+ }
106
+ },
107
+ "be22793949384f6c915eb431c772ea1c": {
108
+ "model_module": "@jupyter-widgets/base",
109
+ "model_name": "LayoutModel",
110
+ "model_module_version": "1.2.0",
111
+ "state": {
112
+ "_model_module": "@jupyter-widgets/base",
113
+ "_model_module_version": "1.2.0",
114
+ "_model_name": "LayoutModel",
115
+ "_view_count": null,
116
+ "_view_module": "@jupyter-widgets/base",
117
+ "_view_module_version": "1.2.0",
118
+ "_view_name": "LayoutView",
119
+ "align_content": null,
120
+ "align_items": null,
121
+ "align_self": null,
122
+ "border": null,
123
+ "bottom": null,
124
+ "display": null,
125
+ "flex": null,
126
+ "flex_flow": null,
127
+ "grid_area": null,
128
+ "grid_auto_columns": null,
129
+ "grid_auto_flow": null,
130
+ "grid_auto_rows": null,
131
+ "grid_column": null,
132
+ "grid_gap": null,
133
+ "grid_row": null,
134
+ "grid_template_areas": null,
135
+ "grid_template_columns": null,
136
+ "grid_template_rows": null,
137
+ "height": null,
138
+ "justify_content": null,
139
+ "justify_items": null,
140
+ "left": null,
141
+ "margin": null,
142
+ "max_height": null,
143
+ "max_width": null,
144
+ "min_height": null,
145
+ "min_width": null,
146
+ "object_fit": null,
147
+ "object_position": null,
148
+ "order": null,
149
+ "overflow": null,
150
+ "overflow_x": null,
151
+ "overflow_y": null,
152
+ "padding": null,
153
+ "right": null,
154
+ "top": null,
155
+ "visibility": null,
156
+ "width": null
157
+ }
158
+ },
159
+ "49e1253a92bb4dd59025cd07f1c4139c": {
160
+ "model_module": "@jupyter-widgets/base",
161
+ "model_name": "LayoutModel",
162
+ "model_module_version": "1.2.0",
163
+ "state": {
164
+ "_model_module": "@jupyter-widgets/base",
165
+ "_model_module_version": "1.2.0",
166
+ "_model_name": "LayoutModel",
167
+ "_view_count": null,
168
+ "_view_module": "@jupyter-widgets/base",
169
+ "_view_module_version": "1.2.0",
170
+ "_view_name": "LayoutView",
171
+ "align_content": null,
172
+ "align_items": null,
173
+ "align_self": null,
174
+ "border": null,
175
+ "bottom": null,
176
+ "display": null,
177
+ "flex": null,
178
+ "flex_flow": null,
179
+ "grid_area": null,
180
+ "grid_auto_columns": null,
181
+ "grid_auto_flow": null,
182
+ "grid_auto_rows": null,
183
+ "grid_column": null,
184
+ "grid_gap": null,
185
+ "grid_row": null,
186
+ "grid_template_areas": null,
187
+ "grid_template_columns": null,
188
+ "grid_template_rows": null,
189
+ "height": null,
190
+ "justify_content": null,
191
+ "justify_items": null,
192
+ "left": null,
193
+ "margin": null,
194
+ "max_height": null,
195
+ "max_width": null,
196
+ "min_height": null,
197
+ "min_width": null,
198
+ "object_fit": null,
199
+ "object_position": null,
200
+ "order": null,
201
+ "overflow": null,
202
+ "overflow_x": null,
203
+ "overflow_y": null,
204
+ "padding": null,
205
+ "right": null,
206
+ "top": null,
207
+ "visibility": null,
208
+ "width": null
209
+ }
210
+ },
211
+ "026d616290f24d1b8b7f90dcb3dfd5a3": {
212
+ "model_module": "@jupyter-widgets/controls",
213
+ "model_name": "DescriptionStyleModel",
214
+ "model_module_version": "1.5.0",
215
+ "state": {
216
+ "_model_module": "@jupyter-widgets/controls",
217
+ "_model_module_version": "1.5.0",
218
+ "_model_name": "DescriptionStyleModel",
219
+ "_view_count": null,
220
+ "_view_module": "@jupyter-widgets/base",
221
+ "_view_module_version": "1.2.0",
222
+ "_view_name": "StyleView",
223
+ "description_width": ""
224
+ }
225
+ },
226
+ "af284825caed4e85b7877adc9bb623d6": {
227
+ "model_module": "@jupyter-widgets/base",
228
+ "model_name": "LayoutModel",
229
+ "model_module_version": "1.2.0",
230
+ "state": {
231
+ "_model_module": "@jupyter-widgets/base",
232
+ "_model_module_version": "1.2.0",
233
+ "_model_name": "LayoutModel",
234
+ "_view_count": null,
235
+ "_view_module": "@jupyter-widgets/base",
236
+ "_view_module_version": "1.2.0",
237
+ "_view_name": "LayoutView",
238
+ "align_content": null,
239
+ "align_items": null,
240
+ "align_self": null,
241
+ "border": null,
242
+ "bottom": null,
243
+ "display": null,
244
+ "flex": null,
245
+ "flex_flow": null,
246
+ "grid_area": null,
247
+ "grid_auto_columns": null,
248
+ "grid_auto_flow": null,
249
+ "grid_auto_rows": null,
250
+ "grid_column": null,
251
+ "grid_gap": null,
252
+ "grid_row": null,
253
+ "grid_template_areas": null,
254
+ "grid_template_columns": null,
255
+ "grid_template_rows": null,
256
+ "height": null,
257
+ "justify_content": null,
258
+ "justify_items": null,
259
+ "left": null,
260
+ "margin": null,
261
+ "max_height": null,
262
+ "max_width": null,
263
+ "min_height": null,
264
+ "min_width": null,
265
+ "object_fit": null,
266
+ "object_position": null,
267
+ "order": null,
268
+ "overflow": null,
269
+ "overflow_x": null,
270
+ "overflow_y": null,
271
+ "padding": null,
272
+ "right": null,
273
+ "top": null,
274
+ "visibility": null,
275
+ "width": null
276
+ }
277
+ },
278
+ "9764d10debe24acb8642158d5878cad0": {
279
+ "model_module": "@jupyter-widgets/controls",
280
+ "model_name": "ProgressStyleModel",
281
+ "model_module_version": "1.5.0",
282
+ "state": {
283
+ "_model_module": "@jupyter-widgets/controls",
284
+ "_model_module_version": "1.5.0",
285
+ "_model_name": "ProgressStyleModel",
286
+ "_view_count": null,
287
+ "_view_module": "@jupyter-widgets/base",
288
+ "_view_module_version": "1.2.0",
289
+ "_view_name": "StyleView",
290
+ "bar_color": null,
291
+ "description_width": ""
292
+ }
293
+ },
294
+ "ab38498225a742c598eaa37cf6788c05": {
295
+ "model_module": "@jupyter-widgets/base",
296
+ "model_name": "LayoutModel",
297
+ "model_module_version": "1.2.0",
298
+ "state": {
299
+ "_model_module": "@jupyter-widgets/base",
300
+ "_model_module_version": "1.2.0",
301
+ "_model_name": "LayoutModel",
302
+ "_view_count": null,
303
+ "_view_module": "@jupyter-widgets/base",
304
+ "_view_module_version": "1.2.0",
305
+ "_view_name": "LayoutView",
306
+ "align_content": null,
307
+ "align_items": null,
308
+ "align_self": null,
309
+ "border": null,
310
+ "bottom": null,
311
+ "display": null,
312
+ "flex": null,
313
+ "flex_flow": null,
314
+ "grid_area": null,
315
+ "grid_auto_columns": null,
316
+ "grid_auto_flow": null,
317
+ "grid_auto_rows": null,
318
+ "grid_column": null,
319
+ "grid_gap": null,
320
+ "grid_row": null,
321
+ "grid_template_areas": null,
322
+ "grid_template_columns": null,
323
+ "grid_template_rows": null,
324
+ "height": null,
325
+ "justify_content": null,
326
+ "justify_items": null,
327
+ "left": null,
328
+ "margin": null,
329
+ "max_height": null,
330
+ "max_width": null,
331
+ "min_height": null,
332
+ "min_width": null,
333
+ "object_fit": null,
334
+ "object_position": null,
335
+ "order": null,
336
+ "overflow": null,
337
+ "overflow_x": null,
338
+ "overflow_y": null,
339
+ "padding": null,
340
+ "right": null,
341
+ "top": null,
342
+ "visibility": null,
343
+ "width": null
344
+ }
345
+ },
346
+ "da4570460aae482abc163ec8275131fb": {
347
+ "model_module": "@jupyter-widgets/controls",
348
+ "model_name": "DescriptionStyleModel",
349
+ "model_module_version": "1.5.0",
350
+ "state": {
351
+ "_model_module": "@jupyter-widgets/controls",
352
+ "_model_module_version": "1.5.0",
353
+ "_model_name": "DescriptionStyleModel",
354
+ "_view_count": null,
355
+ "_view_module": "@jupyter-widgets/base",
356
+ "_view_module_version": "1.2.0",
357
+ "_view_name": "StyleView",
358
+ "description_width": ""
359
+ }
360
+ },
361
+ "95ea82397c9947ad9eaec08c89150061": {
362
+ "model_module": "@jupyter-widgets/controls",
363
+ "model_name": "HBoxModel",
364
+ "model_module_version": "1.5.0",
365
+ "state": {
366
+ "_dom_classes": [],
367
+ "_model_module": "@jupyter-widgets/controls",
368
+ "_model_module_version": "1.5.0",
369
+ "_model_name": "HBoxModel",
370
+ "_view_count": null,
371
+ "_view_module": "@jupyter-widgets/controls",
372
+ "_view_module_version": "1.5.0",
373
+ "_view_name": "HBoxView",
374
+ "box_style": "",
375
+ "children": [
376
+ "IPY_MODEL_9ced7c8f0a7c421182b6c5c18e42008a",
377
+ "IPY_MODEL_2d920152d93c4c7c85cef5096ddcd188",
378
+ "IPY_MODEL_55d3f7b37a154a6d8e8325d0008af4f5"
379
+ ],
380
+ "layout": "IPY_MODEL_44b13bcb54404a048daaa8b12113fe10"
381
+ }
382
+ },
383
+ "9ced7c8f0a7c421182b6c5c18e42008a": {
384
+ "model_module": "@jupyter-widgets/controls",
385
+ "model_name": "HTMLModel",
386
+ "model_module_version": "1.5.0",
387
+ "state": {
388
+ "_dom_classes": [],
389
+ "_model_module": "@jupyter-widgets/controls",
390
+ "_model_module_version": "1.5.0",
391
+ "_model_name": "HTMLModel",
392
+ "_view_count": null,
393
+ "_view_module": "@jupyter-widgets/controls",
394
+ "_view_module_version": "1.5.0",
395
+ "_view_name": "HTMLView",
396
+ "description": "",
397
+ "description_tooltip": null,
398
+ "layout": "IPY_MODEL_9e35dc55dad34141811bf2cbae946702",
399
+ "placeholder": "​",
400
+ "style": "IPY_MODEL_246ea1a7c4e94ee592914314668ff68b",
401
+ "value": "model-00002-of-00004.safetensors: 100%"
402
+ }
403
+ },
404
+ "2d920152d93c4c7c85cef5096ddcd188": {
405
+ "model_module": "@jupyter-widgets/controls",
406
+ "model_name": "FloatProgressModel",
407
+ "model_module_version": "1.5.0",
408
+ "state": {
409
+ "_dom_classes": [],
410
+ "_model_module": "@jupyter-widgets/controls",
411
+ "_model_module_version": "1.5.0",
412
+ "_model_name": "FloatProgressModel",
413
+ "_view_count": null,
414
+ "_view_module": "@jupyter-widgets/controls",
415
+ "_view_module_version": "1.5.0",
416
+ "_view_name": "ProgressView",
417
+ "bar_style": "success",
418
+ "description": "",
419
+ "description_tooltip": null,
420
+ "layout": "IPY_MODEL_31e16c31d03e4ec68666ed806533d66c",
421
+ "max": 4999802720,
422
+ "min": 0,
423
+ "orientation": "horizontal",
424
+ "style": "IPY_MODEL_8a16684b5cea4655992df2f62513bd96",
425
+ "value": 4999802720
426
+ }
427
+ },
428
+ "55d3f7b37a154a6d8e8325d0008af4f5": {
429
+ "model_module": "@jupyter-widgets/controls",
430
+ "model_name": "HTMLModel",
431
+ "model_module_version": "1.5.0",
432
+ "state": {
433
+ "_dom_classes": [],
434
+ "_model_module": "@jupyter-widgets/controls",
435
+ "_model_module_version": "1.5.0",
436
+ "_model_name": "HTMLModel",
437
+ "_view_count": null,
438
+ "_view_module": "@jupyter-widgets/controls",
439
+ "_view_module_version": "1.5.0",
440
+ "_view_name": "HTMLView",
441
+ "description": "",
442
+ "description_tooltip": null,
443
+ "layout": "IPY_MODEL_feaf88667a9a42ab98ef7d858ee3a5a7",
444
+ "placeholder": "​",
445
+ "style": "IPY_MODEL_f4eb3f443a5f4194a0cf92b8e6d1a2aa",
446
+ "value": " 5.00G/5.00G [03:09<00:00, 20.6MB/s]"
447
+ }
448
+ },
449
+ "44b13bcb54404a048daaa8b12113fe10": {
450
+ "model_module": "@jupyter-widgets/base",
451
+ "model_name": "LayoutModel",
452
+ "model_module_version": "1.2.0",
453
+ "state": {
454
+ "_model_module": "@jupyter-widgets/base",
455
+ "_model_module_version": "1.2.0",
456
+ "_model_name": "LayoutModel",
457
+ "_view_count": null,
458
+ "_view_module": "@jupyter-widgets/base",
459
+ "_view_module_version": "1.2.0",
460
+ "_view_name": "LayoutView",
461
+ "align_content": null,
462
+ "align_items": null,
463
+ "align_self": null,
464
+ "border": null,
465
+ "bottom": null,
466
+ "display": null,
467
+ "flex": null,
468
+ "flex_flow": null,
469
+ "grid_area": null,
470
+ "grid_auto_columns": null,
471
+ "grid_auto_flow": null,
472
+ "grid_auto_rows": null,
473
+ "grid_column": null,
474
+ "grid_gap": null,
475
+ "grid_row": null,
476
+ "grid_template_areas": null,
477
+ "grid_template_columns": null,
478
+ "grid_template_rows": null,
479
+ "height": null,
480
+ "justify_content": null,
481
+ "justify_items": null,
482
+ "left": null,
483
+ "margin": null,
484
+ "max_height": null,
485
+ "max_width": null,
486
+ "min_height": null,
487
+ "min_width": null,
488
+ "object_fit": null,
489
+ "object_position": null,
490
+ "order": null,
491
+ "overflow": null,
492
+ "overflow_x": null,
493
+ "overflow_y": null,
494
+ "padding": null,
495
+ "right": null,
496
+ "top": null,
497
+ "visibility": null,
498
+ "width": null
499
+ }
500
+ },
501
+ "9e35dc55dad34141811bf2cbae946702": {
502
+ "model_module": "@jupyter-widgets/base",
503
+ "model_name": "LayoutModel",
504
+ "model_module_version": "1.2.0",
505
+ "state": {
506
+ "_model_module": "@jupyter-widgets/base",
507
+ "_model_module_version": "1.2.0",
508
+ "_model_name": "LayoutModel",
509
+ "_view_count": null,
510
+ "_view_module": "@jupyter-widgets/base",
511
+ "_view_module_version": "1.2.0",
512
+ "_view_name": "LayoutView",
513
+ "align_content": null,
514
+ "align_items": null,
515
+ "align_self": null,
516
+ "border": null,
517
+ "bottom": null,
518
+ "display": null,
519
+ "flex": null,
520
+ "flex_flow": null,
521
+ "grid_area": null,
522
+ "grid_auto_columns": null,
523
+ "grid_auto_flow": null,
524
+ "grid_auto_rows": null,
525
+ "grid_column": null,
526
+ "grid_gap": null,
527
+ "grid_row": null,
528
+ "grid_template_areas": null,
529
+ "grid_template_columns": null,
530
+ "grid_template_rows": null,
531
+ "height": null,
532
+ "justify_content": null,
533
+ "justify_items": null,
534
+ "left": null,
535
+ "margin": null,
536
+ "max_height": null,
537
+ "max_width": null,
538
+ "min_height": null,
539
+ "min_width": null,
540
+ "object_fit": null,
541
+ "object_position": null,
542
+ "order": null,
543
+ "overflow": null,
544
+ "overflow_x": null,
545
+ "overflow_y": null,
546
+ "padding": null,
547
+ "right": null,
548
+ "top": null,
549
+ "visibility": null,
550
+ "width": null
551
+ }
552
+ },
553
+ "246ea1a7c4e94ee592914314668ff68b": {
554
+ "model_module": "@jupyter-widgets/controls",
555
+ "model_name": "DescriptionStyleModel",
556
+ "model_module_version": "1.5.0",
557
+ "state": {
558
+ "_model_module": "@jupyter-widgets/controls",
559
+ "_model_module_version": "1.5.0",
560
+ "_model_name": "DescriptionStyleModel",
561
+ "_view_count": null,
562
+ "_view_module": "@jupyter-widgets/base",
563
+ "_view_module_version": "1.2.0",
564
+ "_view_name": "StyleView",
565
+ "description_width": ""
566
+ }
567
+ },
568
+ "31e16c31d03e4ec68666ed806533d66c": {
569
+ "model_module": "@jupyter-widgets/base",
570
+ "model_name": "LayoutModel",
571
+ "model_module_version": "1.2.0",
572
+ "state": {
573
+ "_model_module": "@jupyter-widgets/base",
574
+ "_model_module_version": "1.2.0",
575
+ "_model_name": "LayoutModel",
576
+ "_view_count": null,
577
+ "_view_module": "@jupyter-widgets/base",
578
+ "_view_module_version": "1.2.0",
579
+ "_view_name": "LayoutView",
580
+ "align_content": null,
581
+ "align_items": null,
582
+ "align_self": null,
583
+ "border": null,
584
+ "bottom": null,
585
+ "display": null,
586
+ "flex": null,
587
+ "flex_flow": null,
588
+ "grid_area": null,
589
+ "grid_auto_columns": null,
590
+ "grid_auto_flow": null,
591
+ "grid_auto_rows": null,
592
+ "grid_column": null,
593
+ "grid_gap": null,
594
+ "grid_row": null,
595
+ "grid_template_areas": null,
596
+ "grid_template_columns": null,
597
+ "grid_template_rows": null,
598
+ "height": null,
599
+ "justify_content": null,
600
+ "justify_items": null,
601
+ "left": null,
602
+ "margin": null,
603
+ "max_height": null,
604
+ "max_width": null,
605
+ "min_height": null,
606
+ "min_width": null,
607
+ "object_fit": null,
608
+ "object_position": null,
609
+ "order": null,
610
+ "overflow": null,
611
+ "overflow_x": null,
612
+ "overflow_y": null,
613
+ "padding": null,
614
+ "right": null,
615
+ "top": null,
616
+ "visibility": null,
617
+ "width": null
618
+ }
619
+ },
620
+ "8a16684b5cea4655992df2f62513bd96": {
621
+ "model_module": "@jupyter-widgets/controls",
622
+ "model_name": "ProgressStyleModel",
623
+ "model_module_version": "1.5.0",
624
+ "state": {
625
+ "_model_module": "@jupyter-widgets/controls",
626
+ "_model_module_version": "1.5.0",
627
+ "_model_name": "ProgressStyleModel",
628
+ "_view_count": null,
629
+ "_view_module": "@jupyter-widgets/base",
630
+ "_view_module_version": "1.2.0",
631
+ "_view_name": "StyleView",
632
+ "bar_color": null,
633
+ "description_width": ""
634
+ }
635
+ },
636
+ "feaf88667a9a42ab98ef7d858ee3a5a7": {
637
+ "model_module": "@jupyter-widgets/base",
638
+ "model_name": "LayoutModel",
639
+ "model_module_version": "1.2.0",
640
+ "state": {
641
+ "_model_module": "@jupyter-widgets/base",
642
+ "_model_module_version": "1.2.0",
643
+ "_model_name": "LayoutModel",
644
+ "_view_count": null,
645
+ "_view_module": "@jupyter-widgets/base",
646
+ "_view_module_version": "1.2.0",
647
+ "_view_name": "LayoutView",
648
+ "align_content": null,
649
+ "align_items": null,
650
+ "align_self": null,
651
+ "border": null,
652
+ "bottom": null,
653
+ "display": null,
654
+ "flex": null,
655
+ "flex_flow": null,
656
+ "grid_area": null,
657
+ "grid_auto_columns": null,
658
+ "grid_auto_flow": null,
659
+ "grid_auto_rows": null,
660
+ "grid_column": null,
661
+ "grid_gap": null,
662
+ "grid_row": null,
663
+ "grid_template_areas": null,
664
+ "grid_template_columns": null,
665
+ "grid_template_rows": null,
666
+ "height": null,
667
+ "justify_content": null,
668
+ "justify_items": null,
669
+ "left": null,
670
+ "margin": null,
671
+ "max_height": null,
672
+ "max_width": null,
673
+ "min_height": null,
674
+ "min_width": null,
675
+ "object_fit": null,
676
+ "object_position": null,
677
+ "order": null,
678
+ "overflow": null,
679
+ "overflow_x": null,
680
+ "overflow_y": null,
681
+ "padding": null,
682
+ "right": null,
683
+ "top": null,
684
+ "visibility": null,
685
+ "width": null
686
+ }
687
+ },
688
+ "f4eb3f443a5f4194a0cf92b8e6d1a2aa": {
689
+ "model_module": "@jupyter-widgets/controls",
690
+ "model_name": "DescriptionStyleModel",
691
+ "model_module_version": "1.5.0",
692
+ "state": {
693
+ "_model_module": "@jupyter-widgets/controls",
694
+ "_model_module_version": "1.5.0",
695
+ "_model_name": "DescriptionStyleModel",
696
+ "_view_count": null,
697
+ "_view_module": "@jupyter-widgets/base",
698
+ "_view_module_version": "1.2.0",
699
+ "_view_name": "StyleView",
700
+ "description_width": ""
701
+ }
702
+ },
703
+ "3cae3c43b5a947fbaf62491a8750c27d": {
704
+ "model_module": "@jupyter-widgets/controls",
705
+ "model_name": "HBoxModel",
706
+ "model_module_version": "1.5.0",
707
+ "state": {
708
+ "_dom_classes": [],
709
+ "_model_module": "@jupyter-widgets/controls",
710
+ "_model_module_version": "1.5.0",
711
+ "_model_name": "HBoxModel",
712
+ "_view_count": null,
713
+ "_view_module": "@jupyter-widgets/controls",
714
+ "_view_module_version": "1.5.0",
715
+ "_view_name": "HBoxView",
716
+ "box_style": "",
717
+ "children": [
718
+ "IPY_MODEL_57ba4b93f3f145da8ada6fd6f400a55b",
719
+ "IPY_MODEL_3912c175d4834d4e8e4daa29ec181ea6",
720
+ "IPY_MODEL_2d7cb2dd6d744b159ffea8bbc321b172"
721
+ ],
722
+ "layout": "IPY_MODEL_399339a2b46248c083d255c7dfe6a53c"
723
+ }
724
+ },
725
+ "57ba4b93f3f145da8ada6fd6f400a55b": {
726
+ "model_module": "@jupyter-widgets/controls",
727
+ "model_name": "HTMLModel",
728
+ "model_module_version": "1.5.0",
729
+ "state": {
730
+ "_dom_classes": [],
731
+ "_model_module": "@jupyter-widgets/controls",
732
+ "_model_module_version": "1.5.0",
733
+ "_model_name": "HTMLModel",
734
+ "_view_count": null,
735
+ "_view_module": "@jupyter-widgets/controls",
736
+ "_view_module_version": "1.5.0",
737
+ "_view_name": "HTMLView",
738
+ "description": "",
739
+ "description_tooltip": null,
740
+ "layout": "IPY_MODEL_1edc550d76014b80bbb08375425e5529",
741
+ "placeholder": "​",
742
+ "style": "IPY_MODEL_6120c3b37aec428cb8e46a9e83e8f970",
743
+ "value": "model-00003-of-00004.safetensors: 100%"
744
+ }
745
+ },
746
+ "3912c175d4834d4e8e4daa29ec181ea6": {
747
+ "model_module": "@jupyter-widgets/controls",
748
+ "model_name": "FloatProgressModel",
749
+ "model_module_version": "1.5.0",
750
+ "state": {
751
+ "_dom_classes": [],
752
+ "_model_module": "@jupyter-widgets/controls",
753
+ "_model_module_version": "1.5.0",
754
+ "_model_name": "FloatProgressModel",
755
+ "_view_count": null,
756
+ "_view_module": "@jupyter-widgets/controls",
757
+ "_view_module_version": "1.5.0",
758
+ "_view_name": "ProgressView",
759
+ "bar_style": "success",
760
+ "description": "",
761
+ "description_tooltip": null,
762
+ "layout": "IPY_MODEL_b649fc3271da4d20b2f8a91277eff699",
763
+ "max": 4915916176,
764
+ "min": 0,
765
+ "orientation": "horizontal",
766
+ "style": "IPY_MODEL_0ca38babe0c9446fbf42c8e69398b020",
767
+ "value": 4915916176
768
+ }
769
+ },
770
+ "2d7cb2dd6d744b159ffea8bbc321b172": {
771
+ "model_module": "@jupyter-widgets/controls",
772
+ "model_name": "HTMLModel",
773
+ "model_module_version": "1.5.0",
774
+ "state": {
775
+ "_dom_classes": [],
776
+ "_model_module": "@jupyter-widgets/controls",
777
+ "_model_module_version": "1.5.0",
778
+ "_model_name": "HTMLModel",
779
+ "_view_count": null,
780
+ "_view_module": "@jupyter-widgets/controls",
781
+ "_view_module_version": "1.5.0",
782
+ "_view_name": "HTMLView",
783
+ "description": "",
784
+ "description_tooltip": null,
785
+ "layout": "IPY_MODEL_5d3045cfb7f54a1e8930becfd8dd2234",
786
+ "placeholder": "​",
787
+ "style": "IPY_MODEL_df17a9b190b74c36be3ee8ca781712c5",
788
+ "value": " 4.92G/4.92G [06:24<00:00, 32.8MB/s]"
789
+ }
790
+ },
791
+ "399339a2b46248c083d255c7dfe6a53c": {
792
+ "model_module": "@jupyter-widgets/base",
793
+ "model_name": "LayoutModel",
794
+ "model_module_version": "1.2.0",
795
+ "state": {
796
+ "_model_module": "@jupyter-widgets/base",
797
+ "_model_module_version": "1.2.0",
798
+ "_model_name": "LayoutModel",
799
+ "_view_count": null,
800
+ "_view_module": "@jupyter-widgets/base",
801
+ "_view_module_version": "1.2.0",
802
+ "_view_name": "LayoutView",
803
+ "align_content": null,
804
+ "align_items": null,
805
+ "align_self": null,
806
+ "border": null,
807
+ "bottom": null,
808
+ "display": null,
809
+ "flex": null,
810
+ "flex_flow": null,
811
+ "grid_area": null,
812
+ "grid_auto_columns": null,
813
+ "grid_auto_flow": null,
814
+ "grid_auto_rows": null,
815
+ "grid_column": null,
816
+ "grid_gap": null,
817
+ "grid_row": null,
818
+ "grid_template_areas": null,
819
+ "grid_template_columns": null,
820
+ "grid_template_rows": null,
821
+ "height": null,
822
+ "justify_content": null,
823
+ "justify_items": null,
824
+ "left": null,
825
+ "margin": null,
826
+ "max_height": null,
827
+ "max_width": null,
828
+ "min_height": null,
829
+ "min_width": null,
830
+ "object_fit": null,
831
+ "object_position": null,
832
+ "order": null,
833
+ "overflow": null,
834
+ "overflow_x": null,
835
+ "overflow_y": null,
836
+ "padding": null,
837
+ "right": null,
838
+ "top": null,
839
+ "visibility": null,
840
+ "width": null
841
+ }
842
+ },
843
+ "1edc550d76014b80bbb08375425e5529": {
844
+ "model_module": "@jupyter-widgets/base",
845
+ "model_name": "LayoutModel",
846
+ "model_module_version": "1.2.0",
847
+ "state": {
848
+ "_model_module": "@jupyter-widgets/base",
849
+ "_model_module_version": "1.2.0",
850
+ "_model_name": "LayoutModel",
851
+ "_view_count": null,
852
+ "_view_module": "@jupyter-widgets/base",
853
+ "_view_module_version": "1.2.0",
854
+ "_view_name": "LayoutView",
855
+ "align_content": null,
856
+ "align_items": null,
857
+ "align_self": null,
858
+ "border": null,
859
+ "bottom": null,
860
+ "display": null,
861
+ "flex": null,
862
+ "flex_flow": null,
863
+ "grid_area": null,
864
+ "grid_auto_columns": null,
865
+ "grid_auto_flow": null,
866
+ "grid_auto_rows": null,
867
+ "grid_column": null,
868
+ "grid_gap": null,
869
+ "grid_row": null,
870
+ "grid_template_areas": null,
871
+ "grid_template_columns": null,
872
+ "grid_template_rows": null,
873
+ "height": null,
874
+ "justify_content": null,
875
+ "justify_items": null,
876
+ "left": null,
877
+ "margin": null,
878
+ "max_height": null,
879
+ "max_width": null,
880
+ "min_height": null,
881
+ "min_width": null,
882
+ "object_fit": null,
883
+ "object_position": null,
884
+ "order": null,
885
+ "overflow": null,
886
+ "overflow_x": null,
887
+ "overflow_y": null,
888
+ "padding": null,
889
+ "right": null,
890
+ "top": null,
891
+ "visibility": null,
892
+ "width": null
893
+ }
894
+ },
895
+ "6120c3b37aec428cb8e46a9e83e8f970": {
896
+ "model_module": "@jupyter-widgets/controls",
897
+ "model_name": "DescriptionStyleModel",
898
+ "model_module_version": "1.5.0",
899
+ "state": {
900
+ "_model_module": "@jupyter-widgets/controls",
901
+ "_model_module_version": "1.5.0",
902
+ "_model_name": "DescriptionStyleModel",
903
+ "_view_count": null,
904
+ "_view_module": "@jupyter-widgets/base",
905
+ "_view_module_version": "1.2.0",
906
+ "_view_name": "StyleView",
907
+ "description_width": ""
908
+ }
909
+ },
910
+ "b649fc3271da4d20b2f8a91277eff699": {
911
+ "model_module": "@jupyter-widgets/base",
912
+ "model_name": "LayoutModel",
913
+ "model_module_version": "1.2.0",
914
+ "state": {
915
+ "_model_module": "@jupyter-widgets/base",
916
+ "_model_module_version": "1.2.0",
917
+ "_model_name": "LayoutModel",
918
+ "_view_count": null,
919
+ "_view_module": "@jupyter-widgets/base",
920
+ "_view_module_version": "1.2.0",
921
+ "_view_name": "LayoutView",
922
+ "align_content": null,
923
+ "align_items": null,
924
+ "align_self": null,
925
+ "border": null,
926
+ "bottom": null,
927
+ "display": null,
928
+ "flex": null,
929
+ "flex_flow": null,
930
+ "grid_area": null,
931
+ "grid_auto_columns": null,
932
+ "grid_auto_flow": null,
933
+ "grid_auto_rows": null,
934
+ "grid_column": null,
935
+ "grid_gap": null,
936
+ "grid_row": null,
937
+ "grid_template_areas": null,
938
+ "grid_template_columns": null,
939
+ "grid_template_rows": null,
940
+ "height": null,
941
+ "justify_content": null,
942
+ "justify_items": null,
943
+ "left": null,
944
+ "margin": null,
945
+ "max_height": null,
946
+ "max_width": null,
947
+ "min_height": null,
948
+ "min_width": null,
949
+ "object_fit": null,
950
+ "object_position": null,
951
+ "order": null,
952
+ "overflow": null,
953
+ "overflow_x": null,
954
+ "overflow_y": null,
955
+ "padding": null,
956
+ "right": null,
957
+ "top": null,
958
+ "visibility": null,
959
+ "width": null
960
+ }
961
+ },
962
+ "0ca38babe0c9446fbf42c8e69398b020": {
963
+ "model_module": "@jupyter-widgets/controls",
964
+ "model_name": "ProgressStyleModel",
965
+ "model_module_version": "1.5.0",
966
+ "state": {
967
+ "_model_module": "@jupyter-widgets/controls",
968
+ "_model_module_version": "1.5.0",
969
+ "_model_name": "ProgressStyleModel",
970
+ "_view_count": null,
971
+ "_view_module": "@jupyter-widgets/base",
972
+ "_view_module_version": "1.2.0",
973
+ "_view_name": "StyleView",
974
+ "bar_color": null,
975
+ "description_width": ""
976
+ }
977
+ },
978
+ "5d3045cfb7f54a1e8930becfd8dd2234": {
979
+ "model_module": "@jupyter-widgets/base",
980
+ "model_name": "LayoutModel",
981
+ "model_module_version": "1.2.0",
982
+ "state": {
983
+ "_model_module": "@jupyter-widgets/base",
984
+ "_model_module_version": "1.2.0",
985
+ "_model_name": "LayoutModel",
986
+ "_view_count": null,
987
+ "_view_module": "@jupyter-widgets/base",
988
+ "_view_module_version": "1.2.0",
989
+ "_view_name": "LayoutView",
990
+ "align_content": null,
991
+ "align_items": null,
992
+ "align_self": null,
993
+ "border": null,
994
+ "bottom": null,
995
+ "display": null,
996
+ "flex": null,
997
+ "flex_flow": null,
998
+ "grid_area": null,
999
+ "grid_auto_columns": null,
1000
+ "grid_auto_flow": null,
1001
+ "grid_auto_rows": null,
1002
+ "grid_column": null,
1003
+ "grid_gap": null,
1004
+ "grid_row": null,
1005
+ "grid_template_areas": null,
1006
+ "grid_template_columns": null,
1007
+ "grid_template_rows": null,
1008
+ "height": null,
1009
+ "justify_content": null,
1010
+ "justify_items": null,
1011
+ "left": null,
1012
+ "margin": null,
1013
+ "max_height": null,
1014
+ "max_width": null,
1015
+ "min_height": null,
1016
+ "min_width": null,
1017
+ "object_fit": null,
1018
+ "object_position": null,
1019
+ "order": null,
1020
+ "overflow": null,
1021
+ "overflow_x": null,
1022
+ "overflow_y": null,
1023
+ "padding": null,
1024
+ "right": null,
1025
+ "top": null,
1026
+ "visibility": null,
1027
+ "width": null
1028
+ }
1029
+ },
1030
+ "df17a9b190b74c36be3ee8ca781712c5": {
1031
+ "model_module": "@jupyter-widgets/controls",
1032
+ "model_name": "DescriptionStyleModel",
1033
+ "model_module_version": "1.5.0",
1034
+ "state": {
1035
+ "_model_module": "@jupyter-widgets/controls",
1036
+ "_model_module_version": "1.5.0",
1037
+ "_model_name": "DescriptionStyleModel",
1038
+ "_view_count": null,
1039
+ "_view_module": "@jupyter-widgets/base",
1040
+ "_view_module_version": "1.2.0",
1041
+ "_view_name": "StyleView",
1042
+ "description_width": ""
1043
+ }
1044
+ },
1045
+ "5d784bc8265c433196cea6b6e9dcc527": {
1046
+ "model_module": "@jupyter-widgets/controls",
1047
+ "model_name": "HBoxModel",
1048
+ "model_module_version": "1.5.0",
1049
+ "state": {
1050
+ "_dom_classes": [],
1051
+ "_model_module": "@jupyter-widgets/controls",
1052
+ "_model_module_version": "1.5.0",
1053
+ "_model_name": "HBoxModel",
1054
+ "_view_count": null,
1055
+ "_view_module": "@jupyter-widgets/controls",
1056
+ "_view_module_version": "1.5.0",
1057
+ "_view_name": "HBoxView",
1058
+ "box_style": "",
1059
+ "children": [
1060
+ "IPY_MODEL_e31256ccfdcb4f40886732c0dd0d1664",
1061
+ "IPY_MODEL_baca6985490742f29f2c8e6bbc8ad1aa",
1062
+ "IPY_MODEL_5567210d612e4feab98db5aa262891df"
1063
+ ],
1064
+ "layout": "IPY_MODEL_d893c245a1344fc7afae63a4b365f689"
1065
+ }
1066
+ },
1067
+ "e31256ccfdcb4f40886732c0dd0d1664": {
1068
+ "model_module": "@jupyter-widgets/controls",
1069
+ "model_name": "HTMLModel",
1070
+ "model_module_version": "1.5.0",
1071
+ "state": {
1072
+ "_dom_classes": [],
1073
+ "_model_module": "@jupyter-widgets/controls",
1074
+ "_model_module_version": "1.5.0",
1075
+ "_model_name": "HTMLModel",
1076
+ "_view_count": null,
1077
+ "_view_module": "@jupyter-widgets/controls",
1078
+ "_view_module_version": "1.5.0",
1079
+ "_view_name": "HTMLView",
1080
+ "description": "",
1081
+ "description_tooltip": null,
1082
+ "layout": "IPY_MODEL_b9fa8463d1894832b39d27bbcfb72249",
1083
+ "placeholder": "​",
1084
+ "style": "IPY_MODEL_d9dd6d0d77154ed59a7179adca0ec04d",
1085
+ "value": "model-00004-of-00004.safetensors: 100%"
1086
+ }
1087
+ },
1088
+ "baca6985490742f29f2c8e6bbc8ad1aa": {
1089
+ "model_module": "@jupyter-widgets/controls",
1090
+ "model_name": "FloatProgressModel",
1091
+ "model_module_version": "1.5.0",
1092
+ "state": {
1093
+ "_dom_classes": [],
1094
+ "_model_module": "@jupyter-widgets/controls",
1095
+ "_model_module_version": "1.5.0",
1096
+ "_model_name": "FloatProgressModel",
1097
+ "_view_count": null,
1098
+ "_view_module": "@jupyter-widgets/controls",
1099
+ "_view_module_version": "1.5.0",
1100
+ "_view_name": "ProgressView",
1101
+ "bar_style": "success",
1102
+ "description": "",
1103
+ "description_tooltip": null,
1104
+ "layout": "IPY_MODEL_dd213f00f50241b28afe6e610e6ea3ed",
1105
+ "max": 1168138808,
1106
+ "min": 0,
1107
+ "orientation": "horizontal",
1108
+ "style": "IPY_MODEL_32499c119f5e42a58b19d8f764bff65c",
1109
+ "value": 1168138808
1110
+ }
1111
+ },
1112
+ "5567210d612e4feab98db5aa262891df": {
1113
+ "model_module": "@jupyter-widgets/controls",
1114
+ "model_name": "HTMLModel",
1115
+ "model_module_version": "1.5.0",
1116
+ "state": {
1117
+ "_dom_classes": [],
1118
+ "_model_module": "@jupyter-widgets/controls",
1119
+ "_model_module_version": "1.5.0",
1120
+ "_model_name": "HTMLModel",
1121
+ "_view_count": null,
1122
+ "_view_module": "@jupyter-widgets/controls",
1123
+ "_view_module_version": "1.5.0",
1124
+ "_view_name": "HTMLView",
1125
+ "description": "",
1126
+ "description_tooltip": null,
1127
+ "layout": "IPY_MODEL_21877ae02d8b4c1eaf78a3115a4957bd",
1128
+ "placeholder": "​",
1129
+ "style": "IPY_MODEL_c534c9d6492f472fa2bddc4dd35a4ad4",
1130
+ "value": " 1.17G/1.17G [01:11<00:00, 15.9MB/s]"
1131
+ }
1132
+ },
1133
+ "d893c245a1344fc7afae63a4b365f689": {
1134
+ "model_module": "@jupyter-widgets/base",
1135
+ "model_name": "LayoutModel",
1136
+ "model_module_version": "1.2.0",
1137
+ "state": {
1138
+ "_model_module": "@jupyter-widgets/base",
1139
+ "_model_module_version": "1.2.0",
1140
+ "_model_name": "LayoutModel",
1141
+ "_view_count": null,
1142
+ "_view_module": "@jupyter-widgets/base",
1143
+ "_view_module_version": "1.2.0",
1144
+ "_view_name": "LayoutView",
1145
+ "align_content": null,
1146
+ "align_items": null,
1147
+ "align_self": null,
1148
+ "border": null,
1149
+ "bottom": null,
1150
+ "display": null,
1151
+ "flex": null,
1152
+ "flex_flow": null,
1153
+ "grid_area": null,
1154
+ "grid_auto_columns": null,
1155
+ "grid_auto_flow": null,
1156
+ "grid_auto_rows": null,
1157
+ "grid_column": null,
1158
+ "grid_gap": null,
1159
+ "grid_row": null,
1160
+ "grid_template_areas": null,
1161
+ "grid_template_columns": null,
1162
+ "grid_template_rows": null,
1163
+ "height": null,
1164
+ "justify_content": null,
1165
+ "justify_items": null,
1166
+ "left": null,
1167
+ "margin": null,
1168
+ "max_height": null,
1169
+ "max_width": null,
1170
+ "min_height": null,
1171
+ "min_width": null,
1172
+ "object_fit": null,
1173
+ "object_position": null,
1174
+ "order": null,
1175
+ "overflow": null,
1176
+ "overflow_x": null,
1177
+ "overflow_y": null,
1178
+ "padding": null,
1179
+ "right": null,
1180
+ "top": null,
1181
+ "visibility": null,
1182
+ "width": null
1183
+ }
1184
+ },
1185
+ "b9fa8463d1894832b39d27bbcfb72249": {
1186
+ "model_module": "@jupyter-widgets/base",
1187
+ "model_name": "LayoutModel",
1188
+ "model_module_version": "1.2.0",
1189
+ "state": {
1190
+ "_model_module": "@jupyter-widgets/base",
1191
+ "_model_module_version": "1.2.0",
1192
+ "_model_name": "LayoutModel",
1193
+ "_view_count": null,
1194
+ "_view_module": "@jupyter-widgets/base",
1195
+ "_view_module_version": "1.2.0",
1196
+ "_view_name": "LayoutView",
1197
+ "align_content": null,
1198
+ "align_items": null,
1199
+ "align_self": null,
1200
+ "border": null,
1201
+ "bottom": null,
1202
+ "display": null,
1203
+ "flex": null,
1204
+ "flex_flow": null,
1205
+ "grid_area": null,
1206
+ "grid_auto_columns": null,
1207
+ "grid_auto_flow": null,
1208
+ "grid_auto_rows": null,
1209
+ "grid_column": null,
1210
+ "grid_gap": null,
1211
+ "grid_row": null,
1212
+ "grid_template_areas": null,
1213
+ "grid_template_columns": null,
1214
+ "grid_template_rows": null,
1215
+ "height": null,
1216
+ "justify_content": null,
1217
+ "justify_items": null,
1218
+ "left": null,
1219
+ "margin": null,
1220
+ "max_height": null,
1221
+ "max_width": null,
1222
+ "min_height": null,
1223
+ "min_width": null,
1224
+ "object_fit": null,
1225
+ "object_position": null,
1226
+ "order": null,
1227
+ "overflow": null,
1228
+ "overflow_x": null,
1229
+ "overflow_y": null,
1230
+ "padding": null,
1231
+ "right": null,
1232
+ "top": null,
1233
+ "visibility": null,
1234
+ "width": null
1235
+ }
1236
+ },
1237
+ "d9dd6d0d77154ed59a7179adca0ec04d": {
1238
+ "model_module": "@jupyter-widgets/controls",
1239
+ "model_name": "DescriptionStyleModel",
1240
+ "model_module_version": "1.5.0",
1241
+ "state": {
1242
+ "_model_module": "@jupyter-widgets/controls",
1243
+ "_model_module_version": "1.5.0",
1244
+ "_model_name": "DescriptionStyleModel",
1245
+ "_view_count": null,
1246
+ "_view_module": "@jupyter-widgets/base",
1247
+ "_view_module_version": "1.2.0",
1248
+ "_view_name": "StyleView",
1249
+ "description_width": ""
1250
+ }
1251
+ },
1252
+ "dd213f00f50241b28afe6e610e6ea3ed": {
1253
+ "model_module": "@jupyter-widgets/base",
1254
+ "model_name": "LayoutModel",
1255
+ "model_module_version": "1.2.0",
1256
+ "state": {
1257
+ "_model_module": "@jupyter-widgets/base",
1258
+ "_model_module_version": "1.2.0",
1259
+ "_model_name": "LayoutModel",
1260
+ "_view_count": null,
1261
+ "_view_module": "@jupyter-widgets/base",
1262
+ "_view_module_version": "1.2.0",
1263
+ "_view_name": "LayoutView",
1264
+ "align_content": null,
1265
+ "align_items": null,
1266
+ "align_self": null,
1267
+ "border": null,
1268
+ "bottom": null,
1269
+ "display": null,
1270
+ "flex": null,
1271
+ "flex_flow": null,
1272
+ "grid_area": null,
1273
+ "grid_auto_columns": null,
1274
+ "grid_auto_flow": null,
1275
+ "grid_auto_rows": null,
1276
+ "grid_column": null,
1277
+ "grid_gap": null,
1278
+ "grid_row": null,
1279
+ "grid_template_areas": null,
1280
+ "grid_template_columns": null,
1281
+ "grid_template_rows": null,
1282
+ "height": null,
1283
+ "justify_content": null,
1284
+ "justify_items": null,
1285
+ "left": null,
1286
+ "margin": null,
1287
+ "max_height": null,
1288
+ "max_width": null,
1289
+ "min_height": null,
1290
+ "min_width": null,
1291
+ "object_fit": null,
1292
+ "object_position": null,
1293
+ "order": null,
1294
+ "overflow": null,
1295
+ "overflow_x": null,
1296
+ "overflow_y": null,
1297
+ "padding": null,
1298
+ "right": null,
1299
+ "top": null,
1300
+ "visibility": null,
1301
+ "width": null
1302
+ }
1303
+ },
1304
+ "32499c119f5e42a58b19d8f764bff65c": {
1305
+ "model_module": "@jupyter-widgets/controls",
1306
+ "model_name": "ProgressStyleModel",
1307
+ "model_module_version": "1.5.0",
1308
+ "state": {
1309
+ "_model_module": "@jupyter-widgets/controls",
1310
+ "_model_module_version": "1.5.0",
1311
+ "_model_name": "ProgressStyleModel",
1312
+ "_view_count": null,
1313
+ "_view_module": "@jupyter-widgets/base",
1314
+ "_view_module_version": "1.2.0",
1315
+ "_view_name": "StyleView",
1316
+ "bar_color": null,
1317
+ "description_width": ""
1318
+ }
1319
+ },
1320
+ "21877ae02d8b4c1eaf78a3115a4957bd": {
1321
+ "model_module": "@jupyter-widgets/base",
1322
+ "model_name": "LayoutModel",
1323
+ "model_module_version": "1.2.0",
1324
+ "state": {
1325
+ "_model_module": "@jupyter-widgets/base",
1326
+ "_model_module_version": "1.2.0",
1327
+ "_model_name": "LayoutModel",
1328
+ "_view_count": null,
1329
+ "_view_module": "@jupyter-widgets/base",
1330
+ "_view_module_version": "1.2.0",
1331
+ "_view_name": "LayoutView",
1332
+ "align_content": null,
1333
+ "align_items": null,
1334
+ "align_self": null,
1335
+ "border": null,
1336
+ "bottom": null,
1337
+ "display": null,
1338
+ "flex": null,
1339
+ "flex_flow": null,
1340
+ "grid_area": null,
1341
+ "grid_auto_columns": null,
1342
+ "grid_auto_flow": null,
1343
+ "grid_auto_rows": null,
1344
+ "grid_column": null,
1345
+ "grid_gap": null,
1346
+ "grid_row": null,
1347
+ "grid_template_areas": null,
1348
+ "grid_template_columns": null,
1349
+ "grid_template_rows": null,
1350
+ "height": null,
1351
+ "justify_content": null,
1352
+ "justify_items": null,
1353
+ "left": null,
1354
+ "margin": null,
1355
+ "max_height": null,
1356
+ "max_width": null,
1357
+ "min_height": null,
1358
+ "min_width": null,
1359
+ "object_fit": null,
1360
+ "object_position": null,
1361
+ "order": null,
1362
+ "overflow": null,
1363
+ "overflow_x": null,
1364
+ "overflow_y": null,
1365
+ "padding": null,
1366
+ "right": null,
1367
+ "top": null,
1368
+ "visibility": null,
1369
+ "width": null
1370
+ }
1371
+ },
1372
+ "c534c9d6492f472fa2bddc4dd35a4ad4": {
1373
+ "model_module": "@jupyter-widgets/controls",
1374
+ "model_name": "DescriptionStyleModel",
1375
+ "model_module_version": "1.5.0",
1376
+ "state": {
1377
+ "_model_module": "@jupyter-widgets/controls",
1378
+ "_model_module_version": "1.5.0",
1379
+ "_model_name": "DescriptionStyleModel",
1380
+ "_view_count": null,
1381
+ "_view_module": "@jupyter-widgets/base",
1382
+ "_view_module_version": "1.2.0",
1383
+ "_view_name": "StyleView",
1384
+ "description_width": ""
1385
+ }
1386
+ }
1387
+ }
1388
+ }
1389
+ },
1390
+ "cells": [
1391
+ {
1392
+ "cell_type": "code",
1393
+ "execution_count": 4,
1394
+ "metadata": {
1395
+ "colab": {
1396
+ "base_uri": "https://localhost:8080/"
1397
+ },
1398
+ "id": "oXhNRG2fq5Nf",
1399
+ "outputId": "84fcca58-5f6e-490b-cef4-1c2a3e4b84f4"
1400
+ },
1401
+ "outputs": [
1402
+ {
1403
+ "output_type": "stream",
1404
+ "name": "stdout",
1405
+ "text": [
1406
+ "Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.12/dist-packages (0.48.2)\n",
1407
+ "Requirement already satisfied: accelerate in /usr/local/lib/python3.12/dist-packages (1.12.0)\n",
1408
+ "Requirement already satisfied: xformers==0.0.33.post1 in /usr/local/lib/python3.12/dist-packages (0.0.33.post1)\n",
1409
+ "Requirement already satisfied: peft in /usr/local/lib/python3.12/dist-packages (0.18.0)\n",
1410
+ "Requirement already satisfied: trl in /usr/local/lib/python3.12/dist-packages (0.25.1)\n",
1411
+ "Requirement already satisfied: triton in /usr/local/lib/python3.12/dist-packages (3.5.0)\n",
1412
+ "Requirement already satisfied: cut_cross_entropy in /usr/local/lib/python3.12/dist-packages (25.1.1)\n",
1413
+ "Requirement already satisfied: unsloth_zoo in /usr/local/lib/python3.12/dist-packages (2025.11.5)\n",
1414
+ "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.12/dist-packages (0.2.1)\n",
1415
+ "Requirement already satisfied: protobuf in /usr/local/lib/python3.12/dist-packages (5.29.5)\n",
1416
+ "Requirement already satisfied: datasets==4.3.0 in /usr/local/lib/python3.12/dist-packages (4.3.0)\n",
1417
+ "Requirement already satisfied: huggingface_hub>=0.34.0 in /usr/local/lib/python3.12/dist-packages (0.36.0)\n",
1418
+ "Requirement already satisfied: hf_transfer in /usr/local/lib/python3.12/dist-packages (0.1.9)\n",
1419
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (3.20.0)\n",
1420
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (2.0.2)\n",
1421
+ "Requirement already satisfied: pyarrow>=21.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (22.0.0)\n",
1422
+ "Requirement already satisfied: dill<0.4.1,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (0.3.8)\n",
1423
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (2.2.2)\n",
1424
+ "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (2.32.4)\n",
1425
+ "Requirement already satisfied: httpx<1.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (0.28.1)\n",
1426
+ "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (4.67.1)\n",
1427
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (3.6.0)\n",
1428
+ "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (0.70.16)\n",
1429
+ "Requirement already satisfied: fsspec<=2025.9.0,>=2023.1.0 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (2025.3.0)\n",
1430
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (25.0)\n",
1431
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from datasets==4.3.0) (6.0.3)\n",
1432
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub>=0.34.0) (4.15.0)\n",
1433
+ "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub>=0.34.0) (1.2.0)\n",
1434
+ "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (3.13.2)\n",
1435
+ "Requirement already satisfied: anyio in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (4.11.0)\n",
1436
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (2025.11.12)\n",
1437
+ "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (1.0.9)\n",
1438
+ "Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets==4.3.0) (3.11)\n",
1439
+ "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.12/dist-packages (from httpcore==1.*->httpx<1.0.0->datasets==4.3.0) (0.16.0)\n",
1440
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets==4.3.0) (3.4.4)\n",
1441
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets==4.3.0) (2.5.0)\n",
1442
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets==4.3.0) (2.9.0.post0)\n",
1443
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets==4.3.0) (2025.2)\n",
1444
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets==4.3.0) (2025.2)\n",
1445
+ "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (2.6.1)\n",
1446
+ "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (1.4.0)\n",
1447
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (25.4.0)\n",
1448
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (1.8.0)\n",
1449
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (6.7.0)\n",
1450
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (0.4.1)\n",
1451
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets==4.3.0) (1.22.0)\n",
1452
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets==4.3.0) (1.17.0)\n",
1453
+ "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.12/dist-packages (from anyio->httpx<1.0.0->datasets==4.3.0) (1.3.1)\n",
1454
+ "Requirement already satisfied: unsloth in /usr/local/lib/python3.12/dist-packages (2025.11.4)\n"
1455
+ ]
1456
+ }
1457
+ ],
1458
+ "source": [
1459
+ "import os, re\n",
1460
+ "\n",
1461
+ "import torch; v = re.match(r\"[0-9]{1,}\\.[0-9]{1,}\", str(torch.__version__)).group(0)\n",
1462
+ "xformers = \"xformers==\" + (\"0.0.33.post1\" if v==\"2.9\" else \"0.0.32.post2\" if v==\"2.8\" else \"0.0.29.post3\")\n",
1463
+ "!pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo\n",
1464
+ "!pip install sentencepiece protobuf \"datasets==4.3.0\" \"huggingface_hub>=0.34.0\" hf_transfer\n",
1465
+ "!pip install --no-deps unsloth"
1466
+ ]
1467
+ },
1468
+ {
1469
+ "cell_type": "code",
1470
+ "source": [
1471
+ "import unsloth\n",
1472
+ "from unsloth import FastLanguageModel\n",
1473
+ "import torch\n",
1474
+ "\n",
1475
+ "max_seq_length = 2048\n",
1476
+ "dtype = None\n",
1477
+ "load_in_4bit = True\n",
1478
+ "model_name = \"unsloth/Meta-Llama-3.1-8B-bnb-4bit\"\n",
1479
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
1480
+ " model_name = model_name,\n",
1481
+ " max_seq_length = max_seq_length,\n",
1482
+ " dtype = dtype,\n",
1483
+ " load_in_4bit = load_in_4bit,\n",
1484
+ ")"
1485
+ ],
1486
+ "metadata": {
1487
+ "colab": {
1488
+ "base_uri": "https://localhost:8080/"
1489
+ },
1490
+ "id": "rHVeLF3Jtlfd",
1491
+ "outputId": "c1eafe1e-ac46-4711-8453-a5b05c6e5b0d"
1492
+ },
1493
+ "execution_count": 5,
1494
+ "outputs": [
1495
+ {
1496
+ "output_type": "stream",
1497
+ "name": "stdout",
1498
+ "text": [
1499
+ "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
1500
+ "πŸ¦₯ Unsloth Zoo will now patch everything to make training faster!\n",
1501
+ "==((====))== Unsloth 2025.11.4: Fast Llama patching. Transformers: 4.57.2.\n",
1502
+ " \\\\ /| Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n",
1503
+ "O^O/ \\_/ \\ Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0\n",
1504
+ "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]\n",
1505
+ " \"-____-\" Free license: http://github.com/unslothai/unsloth\n",
1506
+ "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
1507
+ ]
1508
+ }
1509
+ ]
1510
+ },
1511
+ {
1512
+ "cell_type": "code",
1513
+ "source": [
1514
+ "model = FastLanguageModel.get_peft_model(\n",
1515
+ " model,\n",
1516
+ " r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
1517
+ " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
1518
+ " \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
1519
+ " lora_alpha = 16,\n",
1520
+ " lora_dropout = 0, # Supports any, but = 0 is optimized\n",
1521
+ " bias = \"none\", # Supports any, but = \"none\" is optimized\n",
1522
+ " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n",
1523
+ " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n",
1524
+ " random_state = 61, # Trabzon PlakasΔ±\n",
1525
+ " use_rslora = False, # We support rank stabilized LoRA\n",
1526
+ " loftq_config = None, # And LoftQ\n",
1527
+ ")"
1528
+ ],
1529
+ "metadata": {
1530
+ "colab": {
1531
+ "base_uri": "https://localhost:8080/"
1532
+ },
1533
+ "id": "P3OKXPPNtuSg",
1534
+ "outputId": "708e7805-423b-4982-d224-a663f0065bd2"
1535
+ },
1536
+ "execution_count": 6,
1537
+ "outputs": [
1538
+ {
1539
+ "output_type": "stream",
1540
+ "name": "stderr",
1541
+ "text": [
1542
+ "Unsloth 2025.11.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
1543
+ ]
1544
+ }
1545
+ ]
1546
+ },
1547
+ {
1548
+ "cell_type": "code",
1549
+ "source": [
1550
+ "qa_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
1551
+ "\n",
1552
+ "### Instruction:\n",
1553
+ "Answer the user's question accurately based *only* on the provided context.\n",
1554
+ "\n",
1555
+ "### Context:\n",
1556
+ "{}\n",
1557
+ "\n",
1558
+ "### Question:\n",
1559
+ "{}\n",
1560
+ "\n",
1561
+ "### Response:\n",
1562
+ "{}\"\"\" # bunu daha guzel de yazabiliriz kafamdan salladim\n",
1563
+ "\n",
1564
+ "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
1565
+ "def formatting_prompts_func(examples):\n",
1566
+ " queries = examples[\"query\"]\n",
1567
+ " original_questions = examples[\"original_question\"]\n",
1568
+ " responses = examples[\"response\"]\n",
1569
+ " texts = []\n",
1570
+ " for query, original_question, response in zip(queries, original_questions, responses):\n",
1571
+ " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
1572
+ " text = qa_prompt.format(query, original_question, response) + EOS_TOKEN\n",
1573
+ " texts.append(text)\n",
1574
+ " return texts\n",
1575
+ "from datasets import load_dataset\n",
1576
+ "dataset = load_dataset(\"meta-math/MetaMathQA\", split = \"train\")"
1577
+ ],
1578
+ "metadata": {
1579
+ "id": "3wjkkNvM0Y71"
1580
+ },
1581
+ "execution_count": 7,
1582
+ "outputs": []
1583
+ },
1584
+ {
1585
+ "cell_type": "code",
1586
+ "source": [
1587
+ "shuffled_dataset = dataset.shuffle(seed=42)\n",
1588
+ "random_dataset = shuffled_dataset.select(range(50000))\n",
1589
+ "test_dataset = shuffled_dataset.select(range(50000,62500)) # %80 = 50k, %20 = 12.5k"
1590
+ ],
1591
+ "metadata": {
1592
+ "id": "S3o5ObKlxZdk"
1593
+ },
1594
+ "execution_count": 8,
1595
+ "outputs": []
1596
+ },
1597
+ {
1598
+ "cell_type": "code",
1599
+ "source": [
1600
+ "from trl import SFTConfig, SFTTrainer\n",
1601
+ "trainer = SFTTrainer(\n",
1602
+ " model = model,\n",
1603
+ " tokenizer = tokenizer,\n",
1604
+ " train_dataset = random_dataset,\n",
1605
+ " formatting_func = formatting_prompts_func,\n",
1606
+ " max_seq_length = max_seq_length,\n",
1607
+ " packing = False,\n",
1608
+ " args = SFTConfig(\n",
1609
+ " per_device_train_batch_size = 8,\n",
1610
+ " gradient_accumulation_steps = 4,\n",
1611
+ " warmup_steps = 5,\n",
1612
+ " num_train_epochs = 5,\n",
1613
+ " max_steps = 50,\n",
1614
+ " learning_rate = 5e-5,\n",
1615
+ " logging_steps = 1,\n",
1616
+ " optim = \"adamw_8bit\",\n",
1617
+ " weight_decay = 0.001,\n",
1618
+ " lr_scheduler_type = \"linear\",\n",
1619
+ " seed = 3407,\n",
1620
+ " output_dir = \"outputs\",\n",
1621
+ " report_to = \"none\",\n",
1622
+ " ),\n",
1623
+ ")"
1624
+ ],
1625
+ "metadata": {
1626
+ "id": "bVXWwFNCzg1Z"
1627
+ },
1628
+ "execution_count": 9,
1629
+ "outputs": []
1630
+ },
1631
+ {
1632
+ "cell_type": "code",
1633
+ "source": [
1634
+ "trainer_stats = trainer.train()"
1635
+ ],
1636
+ "metadata": {
1637
+ "id": "0LK85tTpzxos",
1638
+ "colab": {
1639
+ "base_uri": "https://localhost:8080/",
1640
+ "height": 1000
1641
+ },
1642
+ "outputId": "c6890db6-6342-41d5-a554-f738443866e8"
1643
+ },
1644
+ "execution_count": 10,
1645
+ "outputs": [
1646
+ {
1647
+ "output_type": "stream",
1648
+ "name": "stderr",
1649
+ "text": [
1650
+ "The model is already on multiple devices. Skipping the move to device specified in `args`.\n",
1651
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
1652
+ " \\\\ /| Num examples = 50,000 | Num Epochs = 1 | Total steps = 50\n",
1653
+ "O^O/ \\_/ \\ Batch size per device = 8 | Gradient accumulation steps = 4\n",
1654
+ "\\ / Data Parallel GPUs = 1 | Total batch size (8 x 4 x 1) = 32\n",
1655
+ " \"-____-\" Trainable parameters = 83,886,080 of 8,114,147,328 (1.03% trained)\n"
1656
+ ]
1657
+ },
1658
+ {
1659
+ "output_type": "stream",
1660
+ "name": "stdout",
1661
+ "text": [
1662
+ "Unsloth: Will smartly offload gradients to save VRAM!\n"
1663
+ ]
1664
+ },
1665
+ {
1666
+ "output_type": "display_data",
1667
+ "data": {
1668
+ "text/plain": [
1669
+ "<IPython.core.display.HTML object>"
1670
+ ],
1671
+ "text/html": [
1672
+ "\n",
1673
+ " <div>\n",
1674
+ " \n",
1675
+ " <progress value='50' max='50' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1676
+ " [50/50 37:05, Epoch 0/1]\n",
1677
+ " </div>\n",
1678
+ " <table border=\"1\" class=\"dataframe\">\n",
1679
+ " <thead>\n",
1680
+ " <tr style=\"text-align: left;\">\n",
1681
+ " <th>Step</th>\n",
1682
+ " <th>Training Loss</th>\n",
1683
+ " </tr>\n",
1684
+ " </thead>\n",
1685
+ " <tbody>\n",
1686
+ " <tr>\n",
1687
+ " <td>1</td>\n",
1688
+ " <td>1.230200</td>\n",
1689
+ " </tr>\n",
1690
+ " <tr>\n",
1691
+ " <td>2</td>\n",
1692
+ " <td>1.270300</td>\n",
1693
+ " </tr>\n",
1694
+ " <tr>\n",
1695
+ " <td>3</td>\n",
1696
+ " <td>1.299600</td>\n",
1697
+ " </tr>\n",
1698
+ " <tr>\n",
1699
+ " <td>4</td>\n",
1700
+ " <td>1.244500</td>\n",
1701
+ " </tr>\n",
1702
+ " <tr>\n",
1703
+ " <td>5</td>\n",
1704
+ " <td>1.184100</td>\n",
1705
+ " </tr>\n",
1706
+ " <tr>\n",
1707
+ " <td>6</td>\n",
1708
+ " <td>1.111100</td>\n",
1709
+ " </tr>\n",
1710
+ " <tr>\n",
1711
+ " <td>7</td>\n",
1712
+ " <td>1.223400</td>\n",
1713
+ " </tr>\n",
1714
+ " <tr>\n",
1715
+ " <td>8</td>\n",
1716
+ " <td>1.108700</td>\n",
1717
+ " </tr>\n",
1718
+ " <tr>\n",
1719
+ " <td>9</td>\n",
1720
+ " <td>1.065300</td>\n",
1721
+ " </tr>\n",
1722
+ " <tr>\n",
1723
+ " <td>10</td>\n",
1724
+ " <td>1.053800</td>\n",
1725
+ " </tr>\n",
1726
+ " <tr>\n",
1727
+ " <td>11</td>\n",
1728
+ " <td>0.964000</td>\n",
1729
+ " </tr>\n",
1730
+ " <tr>\n",
1731
+ " <td>12</td>\n",
1732
+ " <td>0.902800</td>\n",
1733
+ " </tr>\n",
1734
+ " <tr>\n",
1735
+ " <td>13</td>\n",
1736
+ " <td>0.931900</td>\n",
1737
+ " </tr>\n",
1738
+ " <tr>\n",
1739
+ " <td>14</td>\n",
1740
+ " <td>0.898900</td>\n",
1741
+ " </tr>\n",
1742
+ " <tr>\n",
1743
+ " <td>15</td>\n",
1744
+ " <td>0.874400</td>\n",
1745
+ " </tr>\n",
1746
+ " <tr>\n",
1747
+ " <td>16</td>\n",
1748
+ " <td>0.798700</td>\n",
1749
+ " </tr>\n",
1750
+ " <tr>\n",
1751
+ " <td>17</td>\n",
1752
+ " <td>0.817200</td>\n",
1753
+ " </tr>\n",
1754
+ " <tr>\n",
1755
+ " <td>18</td>\n",
1756
+ " <td>0.772500</td>\n",
1757
+ " </tr>\n",
1758
+ " <tr>\n",
1759
+ " <td>19</td>\n",
1760
+ " <td>0.732200</td>\n",
1761
+ " </tr>\n",
1762
+ " <tr>\n",
1763
+ " <td>20</td>\n",
1764
+ " <td>0.722500</td>\n",
1765
+ " </tr>\n",
1766
+ " <tr>\n",
1767
+ " <td>21</td>\n",
1768
+ " <td>0.668200</td>\n",
1769
+ " </tr>\n",
1770
+ " <tr>\n",
1771
+ " <td>22</td>\n",
1772
+ " <td>0.647300</td>\n",
1773
+ " </tr>\n",
1774
+ " <tr>\n",
1775
+ " <td>23</td>\n",
1776
+ " <td>0.637800</td>\n",
1777
+ " </tr>\n",
1778
+ " <tr>\n",
1779
+ " <td>24</td>\n",
1780
+ " <td>0.636000</td>\n",
1781
+ " </tr>\n",
1782
+ " <tr>\n",
1783
+ " <td>25</td>\n",
1784
+ " <td>0.659600</td>\n",
1785
+ " </tr>\n",
1786
+ " <tr>\n",
1787
+ " <td>26</td>\n",
1788
+ " <td>0.617000</td>\n",
1789
+ " </tr>\n",
1790
+ " <tr>\n",
1791
+ " <td>27</td>\n",
1792
+ " <td>0.650700</td>\n",
1793
+ " </tr>\n",
1794
+ " <tr>\n",
1795
+ " <td>28</td>\n",
1796
+ " <td>0.622400</td>\n",
1797
+ " </tr>\n",
1798
+ " <tr>\n",
1799
+ " <td>29</td>\n",
1800
+ " <td>0.644100</td>\n",
1801
+ " </tr>\n",
1802
+ " <tr>\n",
1803
+ " <td>30</td>\n",
1804
+ " <td>0.623500</td>\n",
1805
+ " </tr>\n",
1806
+ " <tr>\n",
1807
+ " <td>31</td>\n",
1808
+ " <td>0.566500</td>\n",
1809
+ " </tr>\n",
1810
+ " <tr>\n",
1811
+ " <td>32</td>\n",
1812
+ " <td>0.608200</td>\n",
1813
+ " </tr>\n",
1814
+ " <tr>\n",
1815
+ " <td>33</td>\n",
1816
+ " <td>0.593700</td>\n",
1817
+ " </tr>\n",
1818
+ " <tr>\n",
1819
+ " <td>34</td>\n",
1820
+ " <td>0.609600</td>\n",
1821
+ " </tr>\n",
1822
+ " <tr>\n",
1823
+ " <td>35</td>\n",
1824
+ " <td>0.579500</td>\n",
1825
+ " </tr>\n",
1826
+ " <tr>\n",
1827
+ " <td>36</td>\n",
1828
+ " <td>0.597300</td>\n",
1829
+ " </tr>\n",
1830
+ " <tr>\n",
1831
+ " <td>37</td>\n",
1832
+ " <td>0.562200</td>\n",
1833
+ " </tr>\n",
1834
+ " <tr>\n",
1835
+ " <td>38</td>\n",
1836
+ " <td>0.592100</td>\n",
1837
+ " </tr>\n",
1838
+ " <tr>\n",
1839
+ " <td>39</td>\n",
1840
+ " <td>0.562600</td>\n",
1841
+ " </tr>\n",
1842
+ " <tr>\n",
1843
+ " <td>40</td>\n",
1844
+ " <td>0.567200</td>\n",
1845
+ " </tr>\n",
1846
+ " <tr>\n",
1847
+ " <td>41</td>\n",
1848
+ " <td>0.538800</td>\n",
1849
+ " </tr>\n",
1850
+ " <tr>\n",
1851
+ " <td>42</td>\n",
1852
+ " <td>0.590200</td>\n",
1853
+ " </tr>\n",
1854
+ " <tr>\n",
1855
+ " <td>43</td>\n",
1856
+ " <td>0.561600</td>\n",
1857
+ " </tr>\n",
1858
+ " <tr>\n",
1859
+ " <td>44</td>\n",
1860
+ " <td>0.570400</td>\n",
1861
+ " </tr>\n",
1862
+ " <tr>\n",
1863
+ " <td>45</td>\n",
1864
+ " <td>0.529900</td>\n",
1865
+ " </tr>\n",
1866
+ " <tr>\n",
1867
+ " <td>46</td>\n",
1868
+ " <td>0.577500</td>\n",
1869
+ " </tr>\n",
1870
+ " <tr>\n",
1871
+ " <td>47</td>\n",
1872
+ " <td>0.591800</td>\n",
1873
+ " </tr>\n",
1874
+ " <tr>\n",
1875
+ " <td>48</td>\n",
1876
+ " <td>0.585600</td>\n",
1877
+ " </tr>\n",
1878
+ " <tr>\n",
1879
+ " <td>49</td>\n",
1880
+ " <td>0.564800</td>\n",
1881
+ " </tr>\n",
1882
+ " <tr>\n",
1883
+ " <td>50</td>\n",
1884
+ " <td>0.551400</td>\n",
1885
+ " </tr>\n",
1886
+ " </tbody>\n",
1887
+ "</table><p>"
1888
+ ]
1889
+ },
1890
+ "metadata": {}
1891
+ }
1892
+ ]
1893
+ },
1894
+ {
1895
+ "cell_type": "code",
1896
+ "source": [
1897
+ "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
1898
+ "inputs = tokenizer(\n",
1899
+ "[\n",
1900
+ " qa_prompt.format(\n",
1901
+ " \"What is 3+8?\",\n",
1902
+ " \"What is 3+8?\",\n",
1903
+ " \"\",\n",
1904
+ " )\n",
1905
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
1906
+ "\n",
1907
+ "outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)\n",
1908
+ "tokenizer.batch_decode(outputs)"
1909
+ ],
1910
+ "metadata": {
1911
+ "id": "a-xjHIy269KP",
1912
+ "colab": {
1913
+ "base_uri": "https://localhost:8080/"
1914
+ },
1915
+ "outputId": "122559de-ac3e-4e20-d0d3-8ce7e89fd395"
1916
+ },
1917
+ "execution_count": 11,
1918
+ "outputs": [
1919
+ {
1920
+ "output_type": "execute_result",
1921
+ "data": {
1922
+ "text/plain": [
1923
+ "[\"<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nAnswer the user's question accurately based *only* on the provided context.\\n\\n### Context:\\nWhat is 3+8?\\n\\n### Question:\\nWhat is 3+8?\\n\\n### Response:\\nThe answer is 11.\\nThe sum of 3 and 8 is 11.\\n#### 3+8=11\\nThe answer is: 11<|end_of_text|>\"]"
1924
+ ]
1925
+ },
1926
+ "metadata": {},
1927
+ "execution_count": 11
1928
+ }
1929
+ ]
1930
+ },
1931
+ {
1932
+ "cell_type": "code",
1933
+ "source": [
1934
+ "model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)"
1935
+ ],
1936
+ "metadata": {
1937
+ "colab": {
1938
+ "base_uri": "https://localhost:8080/",
1939
+ "height": 413,
1940
+ "referenced_widgets": [
1941
+ "038f9418a1b642dcae983797720afdee",
1942
+ "a77615fc48ed4d97bdd324a2a1ce5517",
1943
+ "edacb8c70ffe431ab02bf3f316674e74",
1944
+ "55cc0de6656f4e7bb92a61ccfb40cfac",
1945
+ "be22793949384f6c915eb431c772ea1c",
1946
+ "49e1253a92bb4dd59025cd07f1c4139c",
1947
+ "026d616290f24d1b8b7f90dcb3dfd5a3",
1948
+ "af284825caed4e85b7877adc9bb623d6",
1949
+ "9764d10debe24acb8642158d5878cad0",
1950
+ "ab38498225a742c598eaa37cf6788c05",
1951
+ "da4570460aae482abc163ec8275131fb",
1952
+ "95ea82397c9947ad9eaec08c89150061",
1953
+ "9ced7c8f0a7c421182b6c5c18e42008a",
1954
+ "2d920152d93c4c7c85cef5096ddcd188",
1955
+ "55d3f7b37a154a6d8e8325d0008af4f5",
1956
+ "44b13bcb54404a048daaa8b12113fe10",
1957
+ "9e35dc55dad34141811bf2cbae946702",
1958
+ "246ea1a7c4e94ee592914314668ff68b",
1959
+ "31e16c31d03e4ec68666ed806533d66c",
1960
+ "8a16684b5cea4655992df2f62513bd96",
1961
+ "feaf88667a9a42ab98ef7d858ee3a5a7",
1962
+ "f4eb3f443a5f4194a0cf92b8e6d1a2aa",
1963
+ "3cae3c43b5a947fbaf62491a8750c27d",
1964
+ "57ba4b93f3f145da8ada6fd6f400a55b",
1965
+ "3912c175d4834d4e8e4daa29ec181ea6",
1966
+ "2d7cb2dd6d744b159ffea8bbc321b172",
1967
+ "399339a2b46248c083d255c7dfe6a53c",
1968
+ "1edc550d76014b80bbb08375425e5529",
1969
+ "6120c3b37aec428cb8e46a9e83e8f970",
1970
+ "b649fc3271da4d20b2f8a91277eff699",
1971
+ "0ca38babe0c9446fbf42c8e69398b020",
1972
+ "5d3045cfb7f54a1e8930becfd8dd2234",
1973
+ "df17a9b190b74c36be3ee8ca781712c5",
1974
+ "5d784bc8265c433196cea6b6e9dcc527",
1975
+ "e31256ccfdcb4f40886732c0dd0d1664",
1976
+ "baca6985490742f29f2c8e6bbc8ad1aa",
1977
+ "5567210d612e4feab98db5aa262891df",
1978
+ "d893c245a1344fc7afae63a4b365f689",
1979
+ "b9fa8463d1894832b39d27bbcfb72249",
1980
+ "d9dd6d0d77154ed59a7179adca0ec04d",
1981
+ "dd213f00f50241b28afe6e610e6ea3ed",
1982
+ "32499c119f5e42a58b19d8f764bff65c",
1983
+ "21877ae02d8b4c1eaf78a3115a4957bd",
1984
+ "c534c9d6492f472fa2bddc4dd35a4ad4"
1985
+ ]
1986
+ },
1987
+ "id": "UPOtyS1c7q6f",
1988
+ "outputId": "8de042b2-d8c9-4723-bf51-ca391af8e482"
1989
+ },
1990
+ "execution_count": 12,
1991
+ "outputs": [
1992
+ {
1993
+ "output_type": "stream",
1994
+ "name": "stdout",
1995
+ "text": [
1996
+ "Found HuggingFace hub cache directory: /root/.cache/huggingface/hub\n",
1997
+ "Checking cache directory for required files...\n",
1998
+ "Cache check failed: model-00001-of-00004.safetensors not found in local cache.\n",
1999
+ "Not all required files found in cache. Will proceed with downloading.\n",
2000
+ "Checking cache directory for required files...\n",
2001
+ "Cache check failed: tokenizer.model not found in local cache.\n",
2002
+ "Not all required files found in cache. Will proceed with downloading.\n"
2003
+ ]
2004
+ },
2005
+ {
2006
+ "output_type": "stream",
2007
+ "name": "stderr",
2008
+ "text": [
2009
+ "\rUnsloth: Preparing safetensor model files: 0%| | 0/4 [00:00<?, ?it/s]"
2010
+ ]
2011
+ },
2012
+ {
2013
+ "output_type": "display_data",
2014
+ "data": {
2015
+ "text/plain": [
2016
+ "model-00001-of-00004.safetensors: 0%| | 0.00/4.98G [00:00<?, ?B/s]"
2017
+ ],
2018
+ "application/vnd.jupyter.widget-view+json": {
2019
+ "version_major": 2,
2020
+ "version_minor": 0,
2021
+ "model_id": "038f9418a1b642dcae983797720afdee"
2022
+ }
2023
+ },
2024
+ "metadata": {}
2025
+ },
2026
+ {
2027
+ "output_type": "stream",
2028
+ "name": "stderr",
2029
+ "text": [
2030
+ "\rUnsloth: Preparing safetensor model files: 25%|β–ˆβ–ˆβ–Œ | 1/4 [04:29<13:27, 269.19s/it]"
2031
+ ]
2032
+ },
2033
+ {
2034
+ "output_type": "display_data",
2035
+ "data": {
2036
+ "text/plain": [
2037
+ "model-00002-of-00004.safetensors: 0%| | 0.00/5.00G [00:00<?, ?B/s]"
2038
+ ],
2039
+ "application/vnd.jupyter.widget-view+json": {
2040
+ "version_major": 2,
2041
+ "version_minor": 0,
2042
+ "model_id": "95ea82397c9947ad9eaec08c89150061"
2043
+ }
2044
+ },
2045
+ "metadata": {}
2046
+ },
2047
+ {
2048
+ "output_type": "stream",
2049
+ "name": "stderr",
2050
+ "text": [
2051
+ "\rUnsloth: Preparing safetensor model files: 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [07:39<07:25, 222.71s/it]"
2052
+ ]
2053
+ },
2054
+ {
2055
+ "output_type": "display_data",
2056
+ "data": {
2057
+ "text/plain": [
2058
+ "model-00003-of-00004.safetensors: 0%| | 0.00/4.92G [00:00<?, ?B/s]"
2059
+ ],
2060
+ "application/vnd.jupyter.widget-view+json": {
2061
+ "version_major": 2,
2062
+ "version_minor": 0,
2063
+ "model_id": "3cae3c43b5a947fbaf62491a8750c27d"
2064
+ }
2065
+ },
2066
+ "metadata": {}
2067
+ },
2068
+ {
2069
+ "output_type": "stream",
2070
+ "name": "stderr",
2071
+ "text": [
2072
+ "\rUnsloth: Preparing safetensor model files: 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [14:03<04:56, 296.64s/it]"
2073
+ ]
2074
+ },
2075
+ {
2076
+ "output_type": "display_data",
2077
+ "data": {
2078
+ "text/plain": [
2079
+ "model-00004-of-00004.safetensors: 0%| | 0.00/1.17G [00:00<?, ?B/s]"
2080
+ ],
2081
+ "application/vnd.jupyter.widget-view+json": {
2082
+ "version_major": 2,
2083
+ "version_minor": 0,
2084
+ "model_id": "5d784bc8265c433196cea6b6e9dcc527"
2085
+ }
2086
+ },
2087
+ "metadata": {}
2088
+ },
2089
+ {
2090
+ "output_type": "stream",
2091
+ "name": "stderr",
2092
+ "text": [
2093
+ "Unsloth: Preparing safetensor model files: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [15:15<00:00, 228.92s/it]\n"
2094
+ ]
2095
+ },
2096
+ {
2097
+ "output_type": "stream",
2098
+ "name": "stdout",
2099
+ "text": [
2100
+ "Note: tokenizer.model not found (this is OK for non-SentencePiece models)\n"
2101
+ ]
2102
+ },
2103
+ {
2104
+ "output_type": "stream",
2105
+ "name": "stderr",
2106
+ "text": [
2107
+ "Unsloth: Merging weights into 16bit: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [08:29<00:00, 127.42s/it]\n"
2108
+ ]
2109
+ },
2110
+ {
2111
+ "output_type": "stream",
2112
+ "name": "stdout",
2113
+ "text": [
2114
+ "Unsloth: Merge process complete. Saved to `/content/model`\n"
2115
+ ]
2116
+ }
2117
+ ]
2118
+ },
2119
+ {
2120
+ "cell_type": "code",
2121
+ "source": [
2122
+ "model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")"
2123
+ ],
2124
+ "metadata": {
2125
+ "colab": {
2126
+ "base_uri": "https://localhost:8080/"
2127
+ },
2128
+ "id": "FqWafrWG8ope",
2129
+ "outputId": "57d65131-fd04-43d7-91ef-5ba2833c95a0"
2130
+ },
2131
+ "execution_count": null,
2132
+ "outputs": [
2133
+ {
2134
+ "output_type": "stream",
2135
+ "name": "stdout",
2136
+ "text": [
2137
+ "Unsloth: Merging model weights to 16-bit format...\n",
2138
+ "Found HuggingFace hub cache directory: /root/.cache/huggingface/hub\n",
2139
+ "Checking cache directory for required files...\n",
2140
+ "Cache check failed: model-00001-of-00004.safetensors not found in local cache.\n",
2141
+ "Not all required files found in cache. Will proceed with downloading.\n",
2142
+ "Checking cache directory for required files...\n",
2143
+ "Cache check failed: tokenizer.model not found in local cache.\n",
2144
+ "Not all required files found in cache. Will proceed with downloading.\n"
2145
+ ]
2146
+ },
2147
+ {
2148
+ "output_type": "stream",
2149
+ "name": "stderr",
2150
+ "text": [
2151
+ "Unsloth: Preparing safetensor model files: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [00:00<00:00, 30120.68it/s]\n"
2152
+ ]
2153
+ },
2154
+ {
2155
+ "output_type": "stream",
2156
+ "name": "stdout",
2157
+ "text": [
2158
+ "Note: tokenizer.model not found (this is OK for non-SentencePiece models)\n"
2159
+ ]
2160
+ },
2161
+ {
2162
+ "output_type": "stream",
2163
+ "name": "stderr",
2164
+ "text": [
2165
+ "Unsloth: Merging weights into 16bit: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [12:22<00:00, 185.56s/it]\n"
2166
+ ]
2167
+ },
2168
+ {
2169
+ "output_type": "stream",
2170
+ "name": "stdout",
2171
+ "text": [
2172
+ "Unsloth: Merge process complete. Saved to `/content/model`\n",
2173
+ "Unsloth: Converting to GGUF format...\n",
2174
+ "==((====))== Unsloth: Conversion from HF to GGUF information\n",
2175
+ " \\\\ /| [0] Installing llama.cpp might take 3 minutes.\n",
2176
+ "O^O/ \\_/ \\ [1] Converting HF to GGUF f16 might take 3 minutes.\n",
2177
+ "\\ / [2] Converting GGUF f16 to ['f16'] might take 10 minutes each.\n",
2178
+ " \"-____-\" In total, you will have to wait at least 16 minutes.\n",
2179
+ "\n",
2180
+ "Unsloth: Installing llama.cpp. This might take 3 minutes...\n",
2181
+ "Unsloth: Updating system package directories\n",
2182
+ "Unsloth: All required system packages already installed!\n",
2183
+ "Unsloth: Install llama.cpp and building - please wait 1 to 3 minutes\n",
2184
+ "Unsloth: Cloning llama.cpp repository\n",
2185
+ "Unsloth: Install GGUF and other packages\n"
2186
+ ]
2187
+ }
2188
+ ]
2189
+ },
2190
+ {
2191
+ "cell_type": "code",
2192
+ "source": [
2193
+ "%pwd"
2194
+ ],
2195
+ "metadata": {
2196
+ "colab": {
2197
+ "base_uri": "https://localhost:8080/",
2198
+ "height": 35
2199
+ },
2200
+ "id": "oz024lasDTDm",
2201
+ "outputId": "afb7f9b7-e428-457a-bbbd-909b2bbfe26c"
2202
+ },
2203
+ "execution_count": 1,
2204
+ "outputs": [
2205
+ {
2206
+ "output_type": "execute_result",
2207
+ "data": {
2208
+ "text/plain": [
2209
+ "'/content'"
2210
+ ],
2211
+ "application/vnd.google.colaboratory.intrinsic+json": {
2212
+ "type": "string"
2213
+ }
2214
+ },
2215
+ "metadata": {},
2216
+ "execution_count": 1
2217
+ }
2218
+ ]
2219
+ },
2220
+ {
2221
+ "cell_type": "code",
2222
+ "source": [],
2223
+ "metadata": {
2224
+ "id": "FS7sB8u1Dibe"
2225
+ },
2226
+ "execution_count": null,
2227
+ "outputs": []
2228
+ }
2229
+ ]
2230
+ }