pymlex
/

roberta-spanish-cefr

@@ -1401,6 +1401,212 @@
         "trainer.push_to_hub(commit_message=\"Spanish CEFR fine-tuning\")\n",
         "print(repo_id)"
       ]
     }
   ],
   "metadata": {

         "trainer.push_to_hub(commit_message=\"Spanish CEFR fine-tuning\")\n",
         "print(repo_id)"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Inference"
+      ],
+      "metadata": {
+        "id": "BDGpYPNLZXba"
+      },
+      "id": "BDGpYPNLZXba"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
+        "import torch\n",
+        "\n",
+        "model_id = \"pymlex/roberta-spanish-cefr\"\n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
+        "model = AutoModelForSequenceClassification.from_pretrained(model_id)\n",
+        "model.eval()\n",
+        "\n",
+        "def predict_cefr(text, top_k=3):\n",
+        "    inputs = tokenizer(\n",
+        "        text,\n",
+        "        return_tensors=\"pt\",\n",
+        "        truncation=True,\n",
+        "        max_length=512,\n",
+        "    )\n",
+        "    with torch.no_grad():\n",
+        "        logits = model(**inputs).logits\n",
+        "        probs = torch.softmax(logits, dim=-1)[0]\n",
+        "\n",
+        "    k = min(top_k, probs.numel())\n",
+        "    values, indices = torch.topk(probs, k=k)\n",
+        "\n",
+        "    return [\n",
+        "        {\n",
+        "            \"label\": model.config.id2label[i.item()],\n",
+        "            \"score\": float(v.item()),\n",
+        "        }\n",
+        "        for i, v in zip(indices, values)\n",
+        "    ]\n",
+        "\n",
+        "text = \"Estimados se\u00f1ores, les escribo para solicitar informaci\u00f3n sobre el curso.\"\n",
+        "print(predict_cefr(text, top_k=3))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 318,
+          "referenced_widgets": [
+            "81146c5153fc4c599ffa4210f49315f9",
+            "30b7c08c82e642e68c324fa37d2273e1",
+            "c6df9d43f7f944e3a731aa794561928e",
+            "3eb95943e51849309a17ddca78155da8",
+            "0b0cd91951cc432f8cb3a3e5399914d8",
+            "fcb1dad83c224c068787027e8bcfd398",
+            "d9297a6d9f174cc99c0c407f5196d2ec",
+            "81fb07e5833548f695df1d4421253990",
+            "706c0a5d14e34471b28e2b98dcc9126f",
+            "cadf5dd05b614d5b962ea953ac9da959",
+            "c68665f850b74a2bb710d3de0ba92087",
+            "3f00dc74734a498ca37b21b5d514d576",
+            "d82869ad687a43eabb113ce699cbf369",
+            "814d90db9746490a825f092523c3a4b9",
+            "c9b0001778274587a251eddd7ecc342f",
+            "939f139ca1054dffa12b1d0a46e1aee4",
+            "4845ee644faf400b94c89ff9d036efd0",
+            "84aa6f541feb4a848b1ee256a793f56a",
+            "4a2b76e52f4046bc84da2fcafdba6bf1",
+            "41c4b4dbf9684298b13e25ad627713bf",
+            "44eff00a96c44e34bf5c8864b11c7dbf",
+            "8e8c0db7bc054c1f8aaccd331cbe742f",
+            "380e59a0af0044ee9cc6cb352270f2b5",
+            "ccd972537eff4c48ad7a69741a905d91",
+            "4b59346d2dfe488a974d1d5c8986be68",
+            "9a54680eb4b6453fb6fe09812ce3438d",
+            "e9ca9c53899441e393db33551a35e354",
+            "d7e32c7d91b94b2997b1c2e8ad5ee9a1",
+            "19809cbf199d423483b1608d87b9ac3d",
+            "a717856a82334c2c993cdada3f904de8",
+            "82e8ba74e4ce4ffc8d6e559c12733dc0",
+            "7973daaba8914086b9bc0d67181a8034",
+            "4f6da9c2fc924d83b41092b7bb5643a3",
+            "6f2f6435bc904bb294955df6db6f2d5b",
+            "b23cb1da8901414c8bee3a65bd352c41",
+            "3d33aaff6edc44ea9d5a4db026832f2b",
+            "edc3cb71239e4f7488eaa361ccf34243",
+            "9f39495c04434441b3137a9b36f038a9",
+            "c02c08a573c34adcbbc3d9fffea1b0e6",
+            "8df14bad8d9c4a409df3a57b6776cc50",
+            "5f87bb2c926941fba9dee6fad84dd0d1",
+            "771cdc1db79048ef8cff34bfa7783b0f",
+            "4f06b9da51ac4881a8626fac9f6525a8",
+            "00186b8e015041efbd04588b93d3ea1c",
+            "cd786a95965542309c2e06a30b1ecbc8",
+            "1fbf3b722a624c45857c6eb03cae065b",
+            "2790d0484be342a790239edd60eaa937",
+            "63b8697c268d4d38846bbcba1e9a7b0e",
+            "efd3f3f2b4cb4712a4f7125d45d5c721",
+            "fb49075f8b1d4ea3913ee44b3391ff5b",
+            "2aff4da8c86a4b9c8836579e00501ea6",
+            "583cc33276924a55bf059969fc19b2cb",
+            "e6969631cc954f79b2b8e5cd84546323",
+            "3c9ad6fc90624e1483150e1df7ec71fd",
+            "0b49180ffb1d4e41b6c2e3990cfd6507"
+          ]
+        },
+        "id": "Xkw2MLNdZqct",
+        "outputId": "2cf27c73-5ec5-438e-b23c-6ea35188900a"
+      },
+      "id": "Xkw2MLNdZqct",
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
+            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+            "You will be able to reuse this secret in all of your notebooks.\n",
+            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "config.json: 0.00B [00:00, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "81146c5153fc4c599ffa4210f49315f9"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "tokenizer_config.json:   0%|          | 0.00/377 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "3f00dc74734a498ca37b21b5d514d576"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "tokenizer.json: 0.00B [00:00, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "380e59a0af0044ee9cc6cb352270f2b5"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "6f2f6435bc904bb294955df6db6f2d5b"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "cd786a95965542309c2e06a30b1ecbc8"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[{'label': 'A1', 'score': 0.22886891663074493}, {'label': 'B1', 'score': 0.19498008489608765}, {'label': 'A2', 'score': 0.19106613099575043}]\n"
+          ]
+        }
+      ]
     }
   ],
   "metadata": {