[ce2cbf]: / Entities_NER.ipynb

Download this file

15614 lines (15614 with data), 611.8 kB

{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# HuggingFace Installations"
      ],
      "metadata": {
        "id": "vawMUV8TT2dg"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install datasets\n",
        "!pip install transformers\n",
        "!pip install seqeval"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6yD-WetFMkwS",
        "outputId": "5880d2ef-ce47-4a34-a522-5d433e689f8a"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting datasets\n",
            "  Downloading datasets-2.6.1-py3-none-any.whl (441 kB)\n",
            "\u001b[K     |████████████████████████████████| 441 kB 4.9 MB/s \n",
            "\u001b[?25hRequirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n",
            "Collecting huggingface-hub<1.0.0,>=0.2.0\n",
            "  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)\n",
            "\u001b[K     |████████████████████████████████| 163 kB 84.5 MB/s \n",
            "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets) (3.8.3)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n",
            "Collecting responses<0.19\n",
            "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
            "Requirement already satisfied: dill<0.3.6 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.5.1)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.3.5)\n",
            "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (2022.8.2)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.13.0)\n",
            "Collecting multiprocess\n",
            "  Downloading multiprocess-0.70.13-py37-none-any.whl (115 kB)\n",
            "\u001b[K     |████████████████████████████████| 115 kB 93.2 MB/s \n",
            "\u001b[?25hRequirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0.1)\n",
            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.64.1)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.21.6)\n",
            "Collecting xxhash\n",
            "  Downloading xxhash-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
            "\u001b[K     |████████████████████████████████| 212 kB 87.9 MB/s \n",
            "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (22.1.0)\n",
            "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.1.1)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (4.1.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.8.1)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (6.0.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.3.1)\n",
            "Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (0.13.0)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.2.0)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (4.0.2)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (3.8.0)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.9)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2022.9.24)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
            "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n",
            "  Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)\n",
            "\u001b[K     |████████████████████████████████| 127 kB 91.7 MB/s \n",
            "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.9.0)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2022.4)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
            "Installing collected packages: urllib3, xxhash, responses, multiprocess, huggingface-hub, datasets\n",
            "  Attempting uninstall: urllib3\n",
            "    Found existing installation: urllib3 1.24.3\n",
            "    Uninstalling urllib3-1.24.3:\n",
            "      Successfully uninstalled urllib3-1.24.3\n",
            "Successfully installed datasets-2.6.1 huggingface-hub-0.10.1 multiprocess-0.70.13 responses-0.18.0 urllib3-1.25.11 xxhash-3.1.0\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting transformers\n",
            "  Downloading transformers-4.23.1-py3-none-any.whl (5.3 MB)\n",
            "\u001b[K     |████████████████████████████████| 5.3 MB 5.0 MB/s \n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.8.0)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.13.0)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.64.1)\n",
            "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n",
            "  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)\n",
            "\u001b[K     |████████████████████████████████| 7.6 MB 71.4 MB/s \n",
            "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.6)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.10.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2022.6.2)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.10.0->transformers) (4.1.1)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.9)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.9.0)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.25.11)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2022.9.24)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
            "Installing collected packages: tokenizers, transformers\n",
            "Successfully installed tokenizers-0.13.1 transformers-4.23.1\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting seqeval\n",
            "  Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
            "\u001b[K     |████████████████████████████████| 43 kB 1.4 MB/s \n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from seqeval) (1.21.6)\n",
            "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (1.0.2)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.1.0)\n",
            "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.7.3)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.2.0)\n",
            "Building wheels for collected packages: seqeval\n",
            "  Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16180 sha256=23a3a3ae907d4e838c06fc140623ceabf5f6a01792b4d0fc8b0bc856e17c3e53\n",
            "  Stored in directory: /root/.cache/pip/wheels/05/96/ee/7cac4e74f3b19e3158dce26a20a1c86b3533c43ec72a549fd7\n",
            "Successfully built seqeval\n",
            "Installing collected packages: seqeval\n",
            "Successfully installed seqeval-1.2.2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 49,
      "metadata": {
        "id": "2LEFwSmbKpLP"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import spacy\n",
        "import tqdm\n",
        "import sys\n",
        "from datasets import Dataset, DatasetDict\n",
        "from transformers import Trainer\n",
        "from transformers import AutoModelForTokenClassification\n",
        "from transformers import AutoTokenizer\n",
        "from transformers import TrainingArguments\n",
        "from transformers import DataCollatorForTokenClassification\n",
        "from datasets import load_metric\n",
        "from transformers import pipeline\n",
        "from transformers import EarlyStoppingCallback, IntervalStrategy"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "G7DLsSK319zN",
        "outputId": "6d759f8e-521e-4fcb-c859-0c4d4f620e58"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%cd /content/drive/MyDrive/IRE"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "czuvYXJp17x-",
        "outputId": "c5fa02b9-9703-4ba6-8fe6-baa9c367f653"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/drive/MyDrive/IRE\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JP_EFEuNKpLT"
      },
      "source": [
        "# Loading Data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "V-_Zs-ZGKpLV",
        "outputId": "aea55c01-65dd-4e58-916a-92e83b90ee95"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                       filename mark       label  offset1  offset2  \\\n",
              "0  es-S0212-71992007000100007-1   T1  ENFERMEDAD       40       61   \n",
              "1  es-S0212-71992007000100007-1   T2  ENFERMEDAD       66       79   \n",
              "2  es-S0212-71992007000100007-1   T3  ENFERMEDAD     1682     1698   \n",
              "3  es-S0212-71992007000100007-1   T4  ENFERMEDAD     1859     1875   \n",
              "4  es-S0212-71992007000100007-1   T5  ENFERMEDAD     1626     1648   \n",
              "\n",
              "                     span      code  \n",
              "0   arterial hypertension  38341003  \n",
              "1           polyarthrosis  36186002  \n",
              "2        pleural effusion  60046008  \n",
              "3        pleural effusion  60046008  \n",
              "4  lower lobe atelectasis  46621007  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-d46a4230-7e8d-49af-b801-224b9699bd0e\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>filename</th>\n",
              "      <th>mark</th>\n",
              "      <th>label</th>\n",
              "      <th>offset1</th>\n",
              "      <th>offset2</th>\n",
              "      <th>span</th>\n",
              "      <th>code</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>es-S0212-71992007000100007-1</td>\n",
              "      <td>T1</td>\n",
              "      <td>ENFERMEDAD</td>\n",
              "      <td>40</td>\n",
              "      <td>61</td>\n",
              "      <td>arterial hypertension</td>\n",
              "      <td>38341003</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>es-S0212-71992007000100007-1</td>\n",
              "      <td>T2</td>\n",
              "      <td>ENFERMEDAD</td>\n",
              "      <td>66</td>\n",
              "      <td>79</td>\n",
              "      <td>polyarthrosis</td>\n",
              "      <td>36186002</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>es-S0212-71992007000100007-1</td>\n",
              "      <td>T3</td>\n",
              "      <td>ENFERMEDAD</td>\n",
              "      <td>1682</td>\n",
              "      <td>1698</td>\n",
              "      <td>pleural effusion</td>\n",
              "      <td>60046008</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>es-S0212-71992007000100007-1</td>\n",
              "      <td>T4</td>\n",
              "      <td>ENFERMEDAD</td>\n",
              "      <td>1859</td>\n",
              "      <td>1875</td>\n",
              "      <td>pleural effusion</td>\n",
              "      <td>60046008</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>es-S0212-71992007000100007-1</td>\n",
              "      <td>T5</td>\n",
              "      <td>ENFERMEDAD</td>\n",
              "      <td>1626</td>\n",
              "      <td>1648</td>\n",
              "      <td>lower lobe atelectasis</td>\n",
              "      <td>46621007</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d46a4230-7e8d-49af-b801-224b9699bd0e')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-d46a4230-7e8d-49af-b801-224b9699bd0e button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-d46a4230-7e8d-49af-b801-224b9699bd0e');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 5
        }
      ],
      "source": [
        "entities = pd.read_csv(\"data/entities.tsv\", delimiter=\"\\t\")\n",
        "entities.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "9ZrFWsjmKpLW"
      },
      "outputs": [],
      "source": [
        "list_off0 = list(entities['offset1'])\n",
        "list_off1 = list(entities['offset2'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "Mafcxoi4KpLW"
      },
      "outputs": [],
      "source": [
        "text_files_path = \"data/text\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OFCPFj9SKpLX",
        "outputId": "8946ced4-8b26-49fe-a6e4-6aa3e4c8e16d"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "A 73-year-old patient with a history of arterial hypertension and polyarthrosis presented to the emergency department with abdominal distension and pain associated with constipation and febrile fever. The symptoms had started three weeks earlier and worsened during the four days prior to admission. During this period, an upper gastrointestinal fibroendoscopy (oesophagus, stomach and duodenum) and a colonoscopy (up to the splenic angle) were performed, but no abnormalities were found.\n",
            "\n",
            "Physical examination revealed a low-grade fever (37.6º C), a distended abdomen, diffusely painful on palpation, tympanised on percussion, with scant borborygmi but no evidence of peritonism, pulmonary auscultation with decreased ventilation in the lower half of the right hemithorax and the onset of intense pain on palpation and percussion of the last three dorsal spinous processes.\n",
            "\n",
            "Analyses showed 8.2 x 109 leukocytes / L, haemoglobin 136 g / L, platelets 186 x 109 / L. Except for glycaemia (123 mg/dl), the following laboratory parameters were normal or negative: urea, creatinine, bilirubin, transaminases, gamma-glutamyltranspeptidase, sodium, potassium, chlorine, calcium, phosphorus, creatine phosphokinase, amylase, lactate dehydrogenase (LDH), proteinogram, immunoglobulin dosage, alpha-fetoprotein, CA 19 antigens. 9 and CA 125 antigens, as well as general urinalysis. ESR and C-reactive protein were elevated, with values of 85 mm / 1 h and 133 mg / L (normal < 5 mg / L), respectively. Mantoux intradermal reaction (10 IU RT-23) was positive, with an induration of 25 mm. Chest X-ray showed an image compatible with right lower lobe atelectasis in the context of an ipsilateral pleural effusion. There were no signs suggestive of adenopathy or alterations in the cardiopericardial silhouette. A thoracoabdominal CT scan confirmed the existence of a right pleural effusion and identified prominent degenerative changes along the dorsolumbar spine but, above all, erosions in the vertebral plates adjacent to the D10-D11 disc space. A lumbar MRI showed hyposignal on T1-weighted sequences and hypersignal on T2-weighted sequences in these vertebrae and their corresponding disc, with morphological alterations typical of infectious spondylodiscitis D10-D11. Three serial blood cultures were negative. Samples obtained by aspiration of the D10-D11 space showed gram-positive cocci chains, which were subsequently recovered and typed as penicillin-sensitive Streptococcus pneumoniae. Pleural fluid analysis showed pH: 7.55; leucocytes: 8.4 x 109/L (58% neutrophils, 26% eosinophils, 16% lymphocytes), protein: 48 g/L (ratio to serum protein: 0.65), glucose: 125 mg/dl, ADA: 25.92 IU/ml, LDH: 362 U/L (pleural LDH/serum LDH ratio: 0.8). Both auramine-rhodamine staining and Löwenstein-Jensen medium culture of pleural fluid were negative and cytology showed no evidence of neoplastic cells.\n",
            "\n",
            "\n",
            "\n",
            "The patient was initially treated intravenously with amoxicillin + clavulanic acid (1 g / 200 mg, every 8 hours). After 21 days, she was switched to the oral route (875 / 125 mg, every 8 hours) for 6 weeks. The evolution was favourable and she was able to start walking with a dorsolumbar corset after the fourth week. One month after the end of antibiotic therapy, a control chest CT scan still showed a discrete pleural effusion, but the patient had only mild mechanical dorsalgia, her ESR had decreased to 21 mm / 1 h and her CRP was 2.4 mg/L. Outpatient follow-up continued for a further three years, during which time the evolution was favourable and a D10-D11 vertebral block was formed.\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "f = open(text_files_path + \"/\" + entities.iloc[1,0] + \".txt\", \"r\", encoding=\"UTF-8\")\n",
        "for l in f:\n",
        "  print(l)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "UeML6HJmKpLX",
        "outputId": "de8c52d4-49f7-441b-89af-cce4183b18e9"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 6650/6650 [01:51<00:00, 59.70it/s]\n"
          ]
        }
      ],
      "source": [
        "#Clinical cases\n",
        "HCs = {}\n",
        "for fid in tqdm.tqdm(range(len(entities[\"filename\"]))):\n",
        "  fname = entities[\"filename\"][fid]\n",
        "  with open(text_files_path + \"/\" + fname + \".txt\", \"r\", encoding=\"UTF-8\") as f:\n",
        "    HCs.update({fname: f.read()})"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "nivRpUrBKpLY"
      },
      "outputs": [],
      "source": [
        "#Diseases\n",
        "ENF = {}\n",
        "enfermedades = []\n",
        "fn = entities[\"filename\"][0]\n",
        "for fname, enf in zip(entities[\"filename\"], entities[\"span\"]):\n",
        "    if fname!=fn:\n",
        "      enfermedades = []\n",
        "    enfermedades.append(enf)\n",
        "    ENF.update({fname: enfermedades})\n",
        "    fn = fname"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nYI19JFtKpLY",
        "outputId": "68cd794c-7b51-488f-ccf8-af9e574794cb"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "741"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ],
      "source": [
        "len(ENF)"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Preprocessing"
      ],
      "metadata": {
        "id": "qviw-SCgUDwK"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!python -m spacy download en_core_web_sm"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xtkwMJhYNP17",
        "outputId": "4c9fdacb-c17f-4bef-909d-fe6b1117dc06"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting en-core-web-sm==3.4.1\n",
            "  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl (12.8 MB)\n",
            "\u001b[K     |████████████████████████████████| 12.8 MB 2.6 MB/s \n",
            "\u001b[?25hRequirement already satisfied: spacy<3.5.0,>=3.4.0 in /usr/local/lib/python3.7/dist-packages (from en-core-web-sm==3.4.1) (3.4.1)\n",
            "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.0.8)\n",
            "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (4.64.1)\n",
            "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.0.7)\n",
            "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.4.4)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (57.4.0)\n",
            "Requirement already satisfied: pathy>=0.3.5 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.6.2)\n",
            "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.23.0)\n",
            "Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (8.1.4)\n",
            "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.9 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.10)\n",
            "Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.10.1)\n",
            "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.8)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.11.3)\n",
            "Requirement already satisfied: typing-extensions<4.2.0,>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (4.1.1)\n",
            "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.3.0)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (21.3)\n",
            "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.0.9)\n",
            "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.10.0,>=1.7.4 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.9.2)\n",
            "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.0.3)\n",
            "Requirement already satisfied: typer<0.5.0,>=0.3.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.4.2)\n",
            "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.21.6)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from catalogue<2.1.0,>=2.0.6->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.9.0)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.9)\n",
            "Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /usr/local/lib/python3.7/dist-packages (from pathy>=0.3.5->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (5.2.1)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.10)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2022.9.24)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.25.11)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.4)\n",
            "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.7/dist-packages (from thinc<8.2.0,>=8.1.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.0.3)\n",
            "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.7/dist-packages (from thinc<8.2.0,>=8.1.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.7.8)\n",
            "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.7/dist-packages (from typer<0.5.0,>=0.3.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (7.1.2)\n",
            "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.0.1)\n",
            "Installing collected packages: en-core-web-sm\n",
            "  Attempting uninstall: en-core-web-sm\n",
            "    Found existing installation: en-core-web-sm 3.4.0\n",
            "    Uninstalling en-core-web-sm-3.4.0:\n",
            "      Successfully uninstalled en-core-web-sm-3.4.0\n",
            "Successfully installed en-core-web-sm-3.4.1\n",
            "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
            "You can now load the package via spacy.load('en_core_web_sm')\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "id": "6at1DC6zKpLZ"
      },
      "outputs": [],
      "source": [
        "nlp = spacy.load(\"en_core_web_sm\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "id": "r-OrghwTKpLZ"
      },
      "outputs": [],
      "source": [
        "HCs_tokenized = []\n",
        "for hc in HCs:\n",
        "    hl = []\n",
        "    tokens = nlp(HCs[hc])\n",
        "    #tokens = HCs[hc].split(\" \") #The simplest option\n",
        "    for t in tokens:\n",
        "        hl.append(str(t))\n",
        "    HCs_tokenized.append(hl)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 15,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "c4Nh9SJ9KpLa",
        "outputId": "4cf4dd3e-e57b-4f52-98d3-ba2fc55632ab"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "741"
            ]
          },
          "metadata": {},
          "execution_count": 15
        }
      ],
      "source": [
        "len(HCs_tokenized)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 16,
      "metadata": {
        "id": "mJkuhsnTKpLa"
      },
      "outputs": [],
      "source": [
        "Ent_tokenized = []\n",
        "for enf in ENF:\n",
        "    Tks = []\n",
        "    for e in ENF[enf]:\n",
        "      sl = []\n",
        "      tokens = nlp(e)\n",
        "      #tokens = e.split(\" \")\n",
        "      for t in tokens:\n",
        "          sl.append(str(t))\n",
        "      Tks.append(sl)\n",
        "    Ent_tokenized.append(Tks)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "195X74buKpLa",
        "outputId": "ad159ed3-e174-4048-9d30-9a92e94dd653"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "741"
            ]
          },
          "metadata": {},
          "execution_count": 17
        }
      ],
      "source": [
        "len(Ent_tokenized)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 18,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mtLgqICDKpLb",
        "outputId": "d6befaf5-1174-4e9e-8cb1-768df6a768ac"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[['arterial', 'hypertension'],\n",
              " ['polyarthrosis'],\n",
              " ['pleural', 'effusion'],\n",
              " ['pleural', 'effusion'],\n",
              " ['lower', 'lobe', 'atelectasis'],\n",
              " ['infectious', 'spondylodiscitis', 'D10', '-', 'D11'],\n",
              " ['pleural', 'effusion']]"
            ]
          },
          "metadata": {},
          "execution_count": 18
        }
      ],
      "source": [
        "Ent_tokenized[0]"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Tagging Data with BIO scheme"
      ],
      "metadata": {
        "id": "ZapndudTUQvP"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {
        "id": "O94wXbu4KpLb"
      },
      "outputs": [],
      "source": [
        "def find_idx(list_to_check, item_to_find):\n",
        "    indices = []\n",
        "    for idx, value in enumerate(list_to_check):\n",
        "        if value == item_to_find:\n",
        "            indices.append(idx)\n",
        "    return indices"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 20,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xy-dmZodKpLb",
        "outputId": "d34b67ea-5d01-4120-9cdb-4801dc7a8108"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['A', '43', '-', 'year', '-', 'old', 'man', 'was', 'admitted', 'to', 'the', 'emergency', 'department', 'due', 'to', 'sudden', 'left', 'lumbar', 'pain', ',', 'continuous', 'and', 'incapacitating', ',', 'without', 'antalgic', 'position', 'or', 'aggravating', 'factors', ',', 'without', 'irradiation', ',', 'with', 'approximately', '23', 'hours', 'of', 'evolution', '.', 'No', 'nausea', 'or', 'vomiting', ',', 'no', 'macroscopic', 'haematuria', 'or', 'lower', 'urinary', 'tract', 'discomfort', '.', 'Absence', 'of', 'precordial', 'pain', '.', 'Hypertension', 'controlled', 'with', 'verapamil', '.', 'He', 'reported', 'an', 'episode', ',', 'interpreted', 'as', 'a', 'transient', 'ischaemic', 'attack', ',', 'approximately', 'eight', 'weeks', 'earlier', '(', 'not', 'confirmed', ')', '.', 'No', 'history', 'of', 'cardiac', 'arrhythmia', 'or', 'valvular', 'heart', 'disease', '.', 'No', 'other', 'previous', 'thromboembolic', 'episodes', '.', 'No', 'known', 'history', 'of', 'urinary', 'lithiasis', '.', 'No', 'osteoarticular', 'or', 'respiratory', 'complaints', '.', 'No', 'cocaine', 'abuse', '.', 'No', 'history', 'of', 'hepatitis', 'B', 'or', 'C.', 'Medicated', 'with', '160', 'mg', '/', 'day', 'of', 'verapamil', '.', '\\n', 'Physical', 'examination', 'showed', 'diaphoretic', ',', 'BP', '150', '/', '110', 'mmHg', ',', '80', 'beats', 'per', 'minute', ',', 'rhythmic', 'and', 'wide', '.', 'Temperature', '37.8', 'ºC.', 'Abdomen', 'painful', 'on', 'deep', 'palpation', 'in', 'the', 'left', 'iliac', 'fossa', 'and', 'flank', ',', 'with', 'defence', ',', 'with', 'no', 'signs', 'of', 'peritoneal', 'irritation', '.', 'Decreased', 'RHA', '.', 'No', 'abdominal', 'murmurs', '.', 'Negative', 'bilateral', 'renal', 'Murphy', '.', 'Existence', 'of', 'symmetrical', 'arterial', 'pulses', '.', 'No', 'perfusion', 'deficit', 'in', 'the', 'extremities', '.', 'General', 'neurological', 'examination', 'without', 'alterations', '.', '\\n', 'Renal', 'ultrasound', 'showed', 'no', 'abnormalities', ',', 'especially', 'dilatation', 'of', 'the', 'urinary', 'tract', '.', 'Laboratory', 'tests', ':', 'Hb15.6', 'g', '/', 'dL', ',', 'Leuc', '13,800/µL', ',', 'Neut', '76.1', '%', ',', 'Creat', '1.4', 'mg', '/', 'dL', ',', 'TGO', '104', 'UI', '/', 'L', ',', 'TGP', '74', 'UI', '/', 'L', ',', 'LDH', '1,890', 'UI', '/', 'L.', 'Coagulation', 'parameters', 'showed', 'no', 'alterations', '.', 'The', 'ECG', 'showed', 'sinus', 'rhythm', ',', 'with', 'no', 'alterations', 'compatible', 'with', 'acute', 'myocardial', 'ischaemia', '.', 'An', 'abdominal', 'and', 'pelvic', 'CT', 'scan', 'was', 'requested', ',', 'which', 'showed', 'the', 'presence', 'of', 'multiple', 'areas', 'without', 'contrast', 'uptake', 'in', 'the', 'left', 'kidney', ',', 'without', 'morpho', '-', 'structural', 'alterations', ',', 'compatible', 'with', 'multifocal', 'areas', 'of', 'ischaemia', ',', 'with', 'multisegmental', 'distribution', ',', 'probably', 'of', 'embolic', 'aetiology', '.', 'No', 'aortic', 'dilatation', 'or', 'renal', 'artery', 'aneurysm', '.', 'No', 'intra', '-', 'peritoneal', 'alterations', '.', 'Taking', 'into', 'account', 'the', 'multi', '-', 'segmental', 'distribution', 'of', 'the', 'ischaemic', 'process', 'and', 'the', 'duration', 'of', 'discomfort', ',', 'we', 'decided', 'that', 'there', 'was', 'no', 'indication', 'for', 'invasive', 'manoeuvres', '.', 'The', 'patient', 'underwent', 'systemic', 'hypo', '-', 'coagulation', 'with', 'heparin', 'in', 'an', 'attempt', 'to', 'avoid', 'future', 'embolic', 'episodes', 'and', 'appropriate', 'analgesia', '.', '\\n\\n', 'He', 'then', 'underwent', 'multiple', 'examinations', 'in', 'an', 'attempt', 'to', 'identify', 'an', 'embolic', 'focus', 'and', 'the', 'aetiological', 'process', '.', 'Echocardiography', 'ruled', 'out', 'pathology', 'of', 'the', 'cardiac', 'valvular', 'apparatus', 'or', 'the', 'existence', 'of', 'valvular', 'vegetations', '.', 'Absence', 'of', 'areas', 'of', 'myocardial', 'dyskinesia', '.', 'Arteriography', 'showed', 'a', 'perfusion', 'deficit', 'of', 'the', 'lower', 'pole', 'of', 'the', 'left', 'kidney', 'with', 'multiple', 'other', 'less', 'prominent', 'areas', 'showing', 'perfusion', 'deficits', 'as', 'well', '.', 'No', 'other', 'alterations', 'such', 'as', 'macro', '/', 'microaneurysms', 'or', 'alterations', 'of', 'the', 'main', 'renal', 'artery', 'or', 'aorta', 'were', 'detected', '.', '\\n', 'The', 'study', 'to', 'rule', 'out', 'prothrombotic', 'and', 'vascular', 'disease', '(', 'lupus', 'anticoagulant', ',', 'anti', '-', 'cardiolipin', ',', 'ANCA', \"'s\", ',', 'detection', 'of', 'cryoglobulins', ',', 'ANA', \"'s\", ',', 'determination', 'of', 'immunoglobulins', 'and', 'complement', 'fractions', ')', 'was', 'negative', '.', '\\n', 'After', '15', 'months', 'of', 'follow', '-', 'up', ',', 'we', 'were', 'left', 'without', 'an', 'aetiological', 'diagnosis', '.', 'The', 'patient', 'remains', 'asymptomatic', ',', 'with', 'no', 'new', 'episodes', 'of', 'embolism', 'or', 'manifestations', 'of', 'systemic', 'disease', '.', 'Anti', '-', 'coagulation', 'therapy', 'has', 'been', 'discontinued', '.', 'He', 'maintains', 'controlled', 'hypertension', 'with', 'the', 'same', 'dose', 'of', 'verapamil', '.', 'The', 'last', 'analytical', 'control', 'had', 'a', 'serum', 'creatinine', 'of', '1.2', 'mg', '/', 'dL', ',', 'and', 'GFR', 'of', '93', 'ml', '/', 'min', '.', 'The', 'follow', '-', 'up', 'kinillogram', 'shows', 'a', 'functional', 'deficit', 'of', 'the', 'affected', 'renal', 'unit', '(', 'differential', 'function', '41', '%', ')', '.', '\\n\\n']\n",
            "[['transient', 'ischaemic', 'attack'], ['cardiac', 'arrhythmia'], ['valvular', 'heart', 'disease'], ['thromboembolic', 'episodes'], ['urinary', 'lithiasis'], ['cocaine', 'abuse'], ['hepatitis', 'B', 'or', 'C'], ['Hypertension'], ['acute', 'myocardial', 'ischaemia'], ['aortic', 'dilatation'], ['renal', 'artery', 'aneurysm'], ['ischaemia'], ['ischaemia'], ['valvular', 'vegetations'], ['myocardial', 'dyskinesia'], ['macro', '/', 'microaneurysms', 'or', 'alterations', 'of', 'the', 'main', 'renal', 'artery', 'or', 'aorta'], ['prothrombotic', 'and', 'vascular', 'disease'], ['hypertension'], ['embolism'], ['systemic', 'disease']]\n",
            "['hepatitis', 'B', 'or', 'C']\n",
            "C\n",
            "27\n",
            "['A', 'four', '-', 'month', '-', 'old', 'boy', 'was', 'admitted', 'with', 'vomiting', ',', 'diarrhoea', 'and', 'severe', 'weight', 'and', 'body', 'weight', 'delay', '.', 'He', 'presented', 'hypotonia', ',', 'bilateral', 'convergent', 'strabismus', 'and', 'inverted', 'nipples', '.', 'Healthy', ',', 'non', '-', 'consanguineous', 'parents', ',', 'normal', 'pregnancy', 'and', 'delivery', '.', 'Since', 'the', 'age', 'of', 'one', 'month', 'she', 'has', 'had', 'poor', 'intake', ',', 'growth', 'failure', 'and', 'vomiting', ',', 'with', 'normal', 'abdominal', 'and', 'transfontanelar', 'ultrasound', ',', 'chest', 'X', '-', 'ray', ',', 'blood', 'tests', 'and', 'urine', 'culture', '.', '\\n\\n', 'On', 'admission', ',', 'rotavirus', 'was', 'detected', 'in', 'stool', '.', 'After', 'resolution', 'of', 'the', 'acute', 'process', ',', 'nutritional', 'support', 'was', 'started', 'with', 'artificial', 'protein', 'hydrolysate', 'formula', ';', 'later', ',', 'the', 'patient', 'was', 'switched', 'to', 'elemental', 'formula', 'as', 'he', 'continued', 'to', 'show', 'poor', 'weight', 'gain', '.', 'The', 'study', 'performed', 'detected', 'hypertransaminemia', ',', 'hypoalbuminemia', ',', 'decreased', 'transferrin', 'and', 'ceruloplasmin', ',', 'with', 'normal', 'hepatotropic', 'virus', 'serology', 'and', 'echocardiography', '.', 'Given', 'the', 'suspicion', 'of', 'a', 'possible', 'inborn', 'error', 'of', 'metabolism', '(', 'IEM', ')', ',', 'a', 'metabolic', 'study', 'was', 'requested', ',', 'while', 'the', 'patient', 'required', 'admission', 'to', 'the', 'Intensive', 'Care', 'Unit', 'due', 'to', 'the', 'onset', 'of', 'symptoms', 'compatible', 'with', 'sepsis', '(', 'fever', ',', 'hypoglycaemia', ',', 'poor', 'general', 'condition', 'and', 'poor', 'colouring', ')', '.', 'A', 'cranial', 'MRI', 'scan', 'was', 'performed', ',', 'which', 'showed', 'cerebellar', 'hypoplasia', '.', 'The', 'result', 'of', 'the', 'metabolic', 'study', 'confirms', 'a', 'congenital', 'protein', 'glycosylation', 'defect', '(', 'CGD', ')', 'type', 'Ia.', '\\n\\n\\n']\n",
            "[['bilateral', 'convergent', 'strabismus'], ['inverted', 'nipples'], ['inborn', 'error', 'of', 'metabolism'], ['IEM'], ['sepsis'], ['cerebellar', 'hypoplasia'], ['congenital', 'protein', 'glycosylation', 'defect', '(', 'CGD', ')', 'type', 'Ia'], ['CGD'], ['hypoglycaemia']]\n",
            "['congenital', 'protein', 'glycosylation', 'defect', '(', 'CGD', ')', 'type', 'Ia']\n",
            "Ia\n",
            "205\n",
            "['Patient', 'aged', '53', 'years', 'at', 'the', 'time', 'of', 'diagnosis', 'with', 'a', 'personal', 'history', 'of', 'a', 'caesarean', 'section', ',', 'arterial', 'hypertension', 'and', 'tachycardia', 'treated', 'with', 'Atenolol', ',', 'who', 'came', 'to', 'the', 'Emergency', 'Department', 'with', 'a', '5', '-', 'month', 'history', 'of', 'progressive', 'dull', 'pain', 'in', 'the', 'left', 'flank', 'and', 'microhaematuria', '.', 'Ultrasound', 'and', 'then', 'abdominal', '-', 'pelvic', 'computerised', 'axial', 'tomography', '(', 'CAT', ')', 'scans', 'revealed', 'a', 'large', 'solid', 'mass', 'measuring', '20x16x13', 'cm', 'arising', 'from', 'the', 'left', 'renal', 'pole', ',', 'without', 'associated', 'venous', 'thrombosis', '.', 'Para', '-', 'aortic', 'adenopathies', 'were', 'also', 'observed', '.', 'All', 'this', 'was', 'compatible', 'with', 'a', 'left', 'renal', 'tumour', '.', '\\n\\n', 'In', 'view', 'of', 'this', 'finding', ',', 'an', 'extension', 'study', 'was', 'carried', 'out', 'with', 'a', 'general', 'analysis', 'showing', 'mild', 'iron', '-', 'deficiency', 'microcytic', 'anaemia', 'and', 'hyperuricaemia', ',', 'a', 'normal', 'chest', 'X', '-', 'ray', ',', 'a', 'bone', 'scan', 'showing', 'an', 'image', 'with', 'a', 'slight', 'increase', 'in', 'tracer', 'uptake', 'corresponding', 'to', 'soft', 'tissue', ',', 'with', 'a', 'rounded', 'morphology', ',', 'located', 'in', 'the', 'hypochondrium', 'and', 'in', 'the', 'hypochondrium', ',', 'located', 'in', 'the', 'left', 'hypochondrium', 'and', 'void', ',', 'exceeding', 'the', 'midline', ',', 'and', '3', 'foci', 'of', 'tracer', 'uptake', ',', 'one', 'in', 'the', 'left', 'iliac', 'blade', ',', 'the', 'second', 'in', 'the', 'left', 'hemivertebrae', 'L4', 'and', 'L5', 'and', 'the', 'third', 'at', 'the', 'level', 'of', 'the', 'right', 'hemivertebra', 'L2', ',', 'which', 'could', 'correspond', 'to', 'bone', 'involvement', 'due', 'to', 'contiguity', 'or', 'be', 'metastatic', ',', 'and', 'an', 'abdominal', 'MRI', 'scan', 'which', 'confirms', 'the', 'findings', 'of', 'the', 'CT', 'scan', '.', '\\n', 'With', 'the', 'presumptive', 'diagnosis', 'of', 'renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy', ',', 'the', 'patient', 'underwent', 'radical', 'left', 'nephrectomy', ',', 'removal', 'of', 'all', 'the', 'pararenal', 'fat', 'and', 'the', 'left', 'adrenal', 'gland', 'and', 'para', '-', 'aortic', 'and', 'interaortocaval', 'lymphadenectomy', 'from', 'the', 'renal', 'artery', 'to', '3', 'cm', 'from', 'the', 'common', 'iliac', 'artery', '.', 'Removal', 'of', 'the', 'mass', 'was', 'difficult', 'due', 'to', 'infiltration', 'of', 'the', 'transverse', 'mesocolon', 'and', 'tail', 'of', 'the', 'pancreas', ',', 'which', 'were', 'released', ',', 'leaving', 'the', 'mesocolon', 'untouched', '.', 'The', 'anatomo', '-', 'pathological', '(', 'A.P.', ')', 'result', 'was', 'as', 'follows', ':', 'Collision', 'renal', 'tumour', '(', 'Leiomyosarcoma', '(', '21', 'x', '15', 'cm', ')', 'and', 'renal', 'carcinoma', 'papillary', 'type', 'nuclear', 'grade', '3', '(', '7', 'x', '3.5', 'cm', ')', ')', '.', 'The', 'weight', 'of', 'the', 'whole', 'specimen', 'was', '2539', 'grams', '.', 'The', 'tumour', 'was', 'in', 'contact', 'with', 'the', 'surgical', 'edge', 'in', 'most', 'areas', '.', 'The', 'renal', 'parenchyma', 'was', 'microscopically', 'respected', 'and', 'no', 'tumour', 'infiltration', 'was', 'observed', '.', 'The', 'ureteral', 'fragment', 'and', 'renal', 'hilum', 'were', 'free', 'of', 'tumour', 'infiltration', '.', 'The', 'immunophenotypic', 'profile', 'of', 'the', 'tumour', 'was', 'as', 'follows', ':', 'Actin', ',', 'Desmin', ',', 'S-100', ',', 'Synaptofusin', 'and', 'CD', '56', 'and', 'c', '-', 'kit', 'negative', ';', 'Smooth', 'muscle', 'actin', 'positive', 'in', 'the', 'sarcomatous', 'zone', 'and', 'keratin', 'cocktail', '(', 'E1', ',', 'E3', ')', 'positive', 'in', 'the', 'carcinomatous', 'zone', '.', 'At', 'the', 'level', 'of', 'the', 'para', '-', 'aortic', 'chain', ',', '16', 'adenopathies', 'were', 'isolated', ',', 'the', 'largest', 'measuring', '2.5', 'cm', ',', 'with', 'metastases', 'in', '14', 'of', 'them', ',', '13', 'from', 'the', 'carcinoma', 'and', '1', 'with', 'mixed', 'metastases', '(', 'sarcoma+carcinoma', ')', '.', 'Six', 'adenopathies', 'were', 'isolated', 'in', 'the', 'interaortocaval', 'chain', ',', 'the', 'largest', 'measuring', '1.4', 'cm', ',', 'three', 'of', 'which', 'were', 'carcinoma', 'metastases', '.', 'The', 'mesocolic', 'bed', 'was', 'infiltrated', 'by', 'leiomyosarcoma', '.', 'In', 'the', 'perisuprarenal', 'adipose', 'tissue', '4', 'adenopathies', 'were', 'isolated', ',', '3', 'of', 'them', 'with', 'metastasis', 'of', 'the', 'carcinoma', 'and', 'another', 'with', 'mixed', 'metastasis', '(', 'carcinoma+sarcoma', ')', '.', 'The', 'left', 'adrenal', 'gland', ',', 'the', 'perirenal', 'fat', 'and', 'the', 'gall', 'bladder', 'showed', 'no', 'tumour', 'elements', '.', '\\n\\n', 'We', 'were', 'therefore', 'faced', 'with', 'a', 'renal', 'collision', 'tumour', 'consisting', 'of', 'a', 'stage', 'IV', 'papillary', 'type', 'renal', 'cancer', '(', 'pT3', '-', '4pN2', ')', 'according', 'to', 'the', 'TNM', 'classification', 'and', 'a', 'stage', 'IV', 'renal', 'leiomyosarcoma', '(', 'pT2bpN1', ')', 'according', 'to', 'the', 'AJCC', 'classification', ',', 'not', 'radically', 'resected', 'and', 'with', 'possible', 'bone', 'metastases', 'according', 'to', 'bone', 'scintigraphy', '.', '\\n', 'The', 'postoperative', 'period', 'was', 'uneventful', 'and', 'the', 'patient', 'was', 'referred', 'to', 'the', 'Medical', 'Oncology', 'Department', '.', 'It', 'was', 'decided', 'to', 'propose', 'complementary', 'chemotherapy', 'treatment', 'with', 'Ifosfamide', '5', 'g', '/', 'm2', 'in', 'a', 'continuous', 'infusion', 'of', '24h', 'x', '1', 'day', '+', 'Adriamycin', '60', 'mg', '/', 'm2', 'x', '1', 'day/21', 'days', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'A', 'CT', 'scan', 'was', 'previously', 'requested', 'in', 'which', 'a', 'small', 'soft', 'tissue', 'enlargement', 'was', 'observed', 'behind', 'the', 'pancreatic', 'tail', 'and', 'renal', 'bed', ',', 'which', 'could', 'be', 'compatible', 'with', 'present', 'disease', '.', '\\n', 'The', 'patient', 'started', 'treatment', 'according', 'to', 'the', 'planned', 'schedule', '3', 'weeks', 'after', 'surgery', '.', 'She', 'received', 'a', 'total', 'of', '6', 'cycles', 'with', 'good', 'clinical', 'tolerance', '.', 'After', 'the', '4th', 'cycle', ',', 'an', 'abdominal', 'CAT', 'scan', 'was', 'performed', ',', 'which', 'was', 'normal', ',', 'and', 'at', 'the', 'end', 'of', 'the', '6th', 'cycle', ',', 'a', 'bone', 'scan', 'was', 'performed', ',', 'which', 'showed', 'no', 'pathological', 'findings', '.', 'The', 'patient', 'underwent', 'regular', 'check', '-', 'ups', 'and', '3', 'months', 'later', 'a', 'chest', 'X', '-', 'ray', 'was', 'performed', 'showing', 'images', 'suggestive', 'of', 'bilateral', 'pulmonary', 'metastases', ',', 'which', 'were', 'confirmed', 'by', 'a', 'CT', 'scan', 'showing', 'multiple', 'bilateral', 'millimetric', 'pulmonary', 'metastases', '.', 'In', 'order', 'to', 'identify', 'the', 'origin', 'of', 'these', 'metastases', ',', 'Thoracic', 'Surgery', 'was', 'consulted', 'and', 'it', 'was', 'decided', 'to', 'perform', 'a', 'left', 'videothoracoscopy', 'with', 'biopsies', '.', 'The', 'P.A.', 'diagnosis', 'was', 'metastasis', 'of', 'poorly', 'differentiated', 'carcinoma', 'with', 'an', 'epithelial', 'component', '(', 'renal', ')', '.', 'In', 'view', 'of', 'this', 'diagnosis', ',', '6', 'months', 'after', 'finishing', 'the', 'first', 'chemotherapy', 'regimen', ',', 'it', 'was', 'decided', 'to', 'start', 'a', 'second', 'line', 'of', 'treatment', 'with', 'a', 'chemotherapy', 'regimen', 'with', 'Gemzitabine', 'and', 'Fluoropyrimidines9', 'that', 'had', 'proved', 'useful', 'in', 'stage', 'IV', 'renal', 'carcinoma', ':', 'Gemcitabine', '1000', 'mg', '/', 'm2', 'days', '1', 'and', '8', '+', 'Capecitabine', '1000', 'mg', '/', 'm2/12h', 'days', '1', '-', '14/21', 'days', ',', 'which', 'the', 'patient', 'accepted', '.', '\\n\\n', 'Treatment', 'was', 'started', 'with', 'a', '20', '%', 'dose', 'reduction', 'which', 'was', 'maintained', 'for', 'the', 'rest', 'of', 'the', 'treatment', 'given', 'the', 'patient', \"'s\", 'general', 'condition', '(', 'ECOG', ':', '1', '-', '2', ')', '.', 'After', 'the', 'second', 'cycle', ',', 'the', 'patient', 'suffered', 'a', 'complication', 'of', 'pulmonary', 'thromboembolism', 'from', 'which', 'she', 'recovered', 'but', 'which', 'caused', 'a', 'delay', 'of', '4', 'weeks', 'in', 'the', 'administration', 'of', 'the', 'third', 'cycle', '.', 'After', '6', 'cycles', 'of', 'treatment', ',', 'which', 'she', 'received', 'with', 'acceptable', 'tolerance', 'except', 'for', 'grade', '4', 'anaemia', ',', 'she', 'was', 're', '-', 'evaluated', 'with', 'a', 'body', 'CT', 'scan', 'which', 'showed', 'persistent', 'pulmonary', 'metastases', 'with', 'the', 'appearance', 'of', 'liver', 'and', 'spleen', 'metastases', 'and', 'local', 'relapse', '.', '\\n', 'In', 'view', 'of', 'this', 'progression', ',', 'treatment', 'with', 'IL-2', 'was', 'proposed', 'for', '6', 'weeks', '(', '1', 'week', 'of', 'induction', 'with', '18', 'Million', 'Units', '(', 'MU', ')', 'x', '5', 'days', 'and', '5', 'weeks', ':', '9', 'MU', 'days', '1', 'and', '2', 'and', '18', 'MU', 'days', '3', 'to', '5)10', '.', 'The', 'patient', 'accepted', 'the', 'treatment', 'with', 'moderate', 'toxicity', 'with', 'secondary', 'constitutional', 'symptoms', 'grade', '2', ',', 'anaemia', 'grade', '3', 'and', 'emesis', 'grade', '1', ',', 'maintaining', 'her', 'general', 'condition', '.', '\\n', 'At', 'the', 'end', 'of', 'treatment', ',', 'a', 'new', 're', '-', 'evaluation', 'was', 'performed', 'with', 'a', 'CT', 'scan', 'showing', 'progression', 'of', 'the', 'disease', 'with', 'a', 'large', 'mass', 'in', 'the', 'surgical', 'site', 'measuring', '19x10x5', 'cm', ',', 'which', 'had', 'grown', 'with', 'respect', 'to', 'the', 'previous', 'CT', 'scan', ',', 'and', 'persistent', 'metastases', 'in', 'the', 'rest', 'of', 'the', 'previous', 'sites', '.', 'The', 'patient', \"'s\", 'general', 'condition', 'worsened', ',', 'with', 'the', 'appearance', 'of', 'abdominal', 'and', 'lumbar', 'pain', ',', 'and', 'on', 'physical', 'examination', 'a', '5', 'cm', 'epigastric', 'mass', 'was', 'palpated', ',', 'corresponding', 'to', 'the', 'underlying', 'mass', '.', '\\n', 'Given', 'this', 'new', 'progression', ',', 'it', 'is', 'considered', 'that', 'the', 'tumour', 'is', 'resistant', 'to', 'chemotherapy', 'or', 'immunotherapy', 'schemes', 'against', 'renal', 'carcinoma', 'and', 'it', 'is', 'proposed', 'to', 'start', 'palliative', 'treatment', 'with', 'liposomal', 'Adriamycin', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'The', 'patient', 'accepted', 'the', 'proposal', 'and', 'received', 'a', 'first', 'cycle', '.', 'However', ',', 'a', 'week', 'later', 'she', 'went', 'to', 'the', 'emergency', 'department', 'for', 'hypovolemic', 'shock', 'with', 'metabolic', 'acidosis', 'and', 'pre', '-', 'renal', 'renal', 'failure', 'secondary', 'to', 'hyperemesis', 'of', '4', 'days', \"'\", 'evolution', 'and', 'grade', '4', 'anaemia', '.', 'The', 'patient', 'recovered', 'from', 'this', 'episode', 'but', 'a', 'week', 'later', 'she', 'began', 'with', 'faecal', 'vomiting', 'of', 'probable', 'obstructive', 'origin', 'due', 'to', 'compression', 'of', 'the', 'retroperitoneal', 'mass', ',', 'causing', 'progressive', 'deterioration', 'of', 'the', 'patient', 'and', 'the', 'patient', 'died', 'of', 'multi', '-', 'organ', 'failure', '19', 'months', 'after', 'diagnosis', '.', '\\n\\n\\n']\n",
            "[['tumour'], ['arterial', 'hypertension'], ['venous', 'thrombosis'], ['Para', '-', 'aortic', 'adenopathies'], ['renal', 'tumour'], ['hyperuricaemia'], ['anaemia'], ['bone', 'involvement'], ['renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['Leiomyosarcoma'], ['leiomyosarcoma'], ['renal', 'tumour'], ['tumour', 'infiltration'], ['tumour', 'infiltration'], ['sarcoma'], ['sarcoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma', 'metastases'], ['renal', 'collision', 'tumour'], ['tumour'], ['papillary', 'type', 'renal', 'cancer'], ['renal', 'leiomyosarcoma'], ['metastases'], ['bone', 'metastases'], ['leiomyosarcoma'], ['tumour'], ['sarcomatous', 'component', 'of', 'the', 'tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['bilateral', 'pulmonary', 'metastases'], ['carcinoma'], ['carcinoma'], ['pulmonary', 'metastases'], ['pulmonary', 'metastases'], ['pulmonary', 'thromboembolism'], ['metastases'], ['metastases'], ['anaemia'], ['pulmonary', 'metastases'], ['anaemia'], ['emesis'], ['metastases'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['anaemia'], ['hypovolemic', 'shock'], ['metabolic', 'acidosis'], ['hyperemesis'], ['multi', '-', 'organ', 'failure'], ['carcinoma'], ['sarcomatous', 'component', 'of', 'the', 'tumour']]\n",
            "['sarcoma']\n",
            "sarcoma\n",
            "231\n",
            "['Patient', 'aged', '53', 'years', 'at', 'the', 'time', 'of', 'diagnosis', 'with', 'a', 'personal', 'history', 'of', 'a', 'caesarean', 'section', ',', 'arterial', 'hypertension', 'and', 'tachycardia', 'treated', 'with', 'Atenolol', ',', 'who', 'came', 'to', 'the', 'Emergency', 'Department', 'with', 'a', '5', '-', 'month', 'history', 'of', 'progressive', 'dull', 'pain', 'in', 'the', 'left', 'flank', 'and', 'microhaematuria', '.', 'Ultrasound', 'and', 'then', 'abdominal', '-', 'pelvic', 'computerised', 'axial', 'tomography', '(', 'CAT', ')', 'scans', 'revealed', 'a', 'large', 'solid', 'mass', 'measuring', '20x16x13', 'cm', 'arising', 'from', 'the', 'left', 'renal', 'pole', ',', 'without', 'associated', 'venous', 'thrombosis', '.', 'Para', '-', 'aortic', 'adenopathies', 'were', 'also', 'observed', '.', 'All', 'this', 'was', 'compatible', 'with', 'a', 'left', 'renal', 'tumour', '.', '\\n\\n', 'In', 'view', 'of', 'this', 'finding', ',', 'an', 'extension', 'study', 'was', 'carried', 'out', 'with', 'a', 'general', 'analysis', 'showing', 'mild', 'iron', '-', 'deficiency', 'microcytic', 'anaemia', 'and', 'hyperuricaemia', ',', 'a', 'normal', 'chest', 'X', '-', 'ray', ',', 'a', 'bone', 'scan', 'showing', 'an', 'image', 'with', 'a', 'slight', 'increase', 'in', 'tracer', 'uptake', 'corresponding', 'to', 'soft', 'tissue', ',', 'with', 'a', 'rounded', 'morphology', ',', 'located', 'in', 'the', 'hypochondrium', 'and', 'in', 'the', 'hypochondrium', ',', 'located', 'in', 'the', 'left', 'hypochondrium', 'and', 'void', ',', 'exceeding', 'the', 'midline', ',', 'and', '3', 'foci', 'of', 'tracer', 'uptake', ',', 'one', 'in', 'the', 'left', 'iliac', 'blade', ',', 'the', 'second', 'in', 'the', 'left', 'hemivertebrae', 'L4', 'and', 'L5', 'and', 'the', 'third', 'at', 'the', 'level', 'of', 'the', 'right', 'hemivertebra', 'L2', ',', 'which', 'could', 'correspond', 'to', 'bone', 'involvement', 'due', 'to', 'contiguity', 'or', 'be', 'metastatic', ',', 'and', 'an', 'abdominal', 'MRI', 'scan', 'which', 'confirms', 'the', 'findings', 'of', 'the', 'CT', 'scan', '.', '\\n', 'With', 'the', 'presumptive', 'diagnosis', 'of', 'renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy', ',', 'the', 'patient', 'underwent', 'radical', 'left', 'nephrectomy', ',', 'removal', 'of', 'all', 'the', 'pararenal', 'fat', 'and', 'the', 'left', 'adrenal', 'gland', 'and', 'para', '-', 'aortic', 'and', 'interaortocaval', 'lymphadenectomy', 'from', 'the', 'renal', 'artery', 'to', '3', 'cm', 'from', 'the', 'common', 'iliac', 'artery', '.', 'Removal', 'of', 'the', 'mass', 'was', 'difficult', 'due', 'to', 'infiltration', 'of', 'the', 'transverse', 'mesocolon', 'and', 'tail', 'of', 'the', 'pancreas', ',', 'which', 'were', 'released', ',', 'leaving', 'the', 'mesocolon', 'untouched', '.', 'The', 'anatomo', '-', 'pathological', '(', 'A.P.', ')', 'result', 'was', 'as', 'follows', ':', 'Collision', 'renal', 'tumour', '(', 'Leiomyosarcoma', '(', '21', 'x', '15', 'cm', ')', 'and', 'renal', 'carcinoma', 'papillary', 'type', 'nuclear', 'grade', '3', '(', '7', 'x', '3.5', 'cm', ')', ')', '.', 'The', 'weight', 'of', 'the', 'whole', 'specimen', 'was', '2539', 'grams', '.', 'The', 'tumour', 'was', 'in', 'contact', 'with', 'the', 'surgical', 'edge', 'in', 'most', 'areas', '.', 'The', 'renal', 'parenchyma', 'was', 'microscopically', 'respected', 'and', 'no', 'tumour', 'infiltration', 'was', 'observed', '.', 'The', 'ureteral', 'fragment', 'and', 'renal', 'hilum', 'were', 'free', 'of', 'tumour', 'infiltration', '.', 'The', 'immunophenotypic', 'profile', 'of', 'the', 'tumour', 'was', 'as', 'follows', ':', 'Actin', ',', 'Desmin', ',', 'S-100', ',', 'Synaptofusin', 'and', 'CD', '56', 'and', 'c', '-', 'kit', 'negative', ';', 'Smooth', 'muscle', 'actin', 'positive', 'in', 'the', 'sarcomatous', 'zone', 'and', 'keratin', 'cocktail', '(', 'E1', ',', 'E3', ')', 'positive', 'in', 'the', 'carcinomatous', 'zone', '.', 'At', 'the', 'level', 'of', 'the', 'para', '-', 'aortic', 'chain', ',', '16', 'adenopathies', 'were', 'isolated', ',', 'the', 'largest', 'measuring', '2.5', 'cm', ',', 'with', 'metastases', 'in', '14', 'of', 'them', ',', '13', 'from', 'the', 'carcinoma', 'and', '1', 'with', 'mixed', 'metastases', '(', 'sarcoma+carcinoma', ')', '.', 'Six', 'adenopathies', 'were', 'isolated', 'in', 'the', 'interaortocaval', 'chain', ',', 'the', 'largest', 'measuring', '1.4', 'cm', ',', 'three', 'of', 'which', 'were', 'carcinoma', 'metastases', '.', 'The', 'mesocolic', 'bed', 'was', 'infiltrated', 'by', 'leiomyosarcoma', '.', 'In', 'the', 'perisuprarenal', 'adipose', 'tissue', '4', 'adenopathies', 'were', 'isolated', ',', '3', 'of', 'them', 'with', 'metastasis', 'of', 'the', 'carcinoma', 'and', 'another', 'with', 'mixed', 'metastasis', '(', 'carcinoma+sarcoma', ')', '.', 'The', 'left', 'adrenal', 'gland', ',', 'the', 'perirenal', 'fat', 'and', 'the', 'gall', 'bladder', 'showed', 'no', 'tumour', 'elements', '.', '\\n\\n', 'We', 'were', 'therefore', 'faced', 'with', 'a', 'renal', 'collision', 'tumour', 'consisting', 'of', 'a', 'stage', 'IV', 'papillary', 'type', 'renal', 'cancer', '(', 'pT3', '-', '4pN2', ')', 'according', 'to', 'the', 'TNM', 'classification', 'and', 'a', 'stage', 'IV', 'renal', 'leiomyosarcoma', '(', 'pT2bpN1', ')', 'according', 'to', 'the', 'AJCC', 'classification', ',', 'not', 'radically', 'resected', 'and', 'with', 'possible', 'bone', 'metastases', 'according', 'to', 'bone', 'scintigraphy', '.', '\\n', 'The', 'postoperative', 'period', 'was', 'uneventful', 'and', 'the', 'patient', 'was', 'referred', 'to', 'the', 'Medical', 'Oncology', 'Department', '.', 'It', 'was', 'decided', 'to', 'propose', 'complementary', 'chemotherapy', 'treatment', 'with', 'Ifosfamide', '5', 'g', '/', 'm2', 'in', 'a', 'continuous', 'infusion', 'of', '24h', 'x', '1', 'day', '+', 'Adriamycin', '60', 'mg', '/', 'm2', 'x', '1', 'day/21', 'days', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'A', 'CT', 'scan', 'was', 'previously', 'requested', 'in', 'which', 'a', 'small', 'soft', 'tissue', 'enlargement', 'was', 'observed', 'behind', 'the', 'pancreatic', 'tail', 'and', 'renal', 'bed', ',', 'which', 'could', 'be', 'compatible', 'with', 'present', 'disease', '.', '\\n', 'The', 'patient', 'started', 'treatment', 'according', 'to', 'the', 'planned', 'schedule', '3', 'weeks', 'after', 'surgery', '.', 'She', 'received', 'a', 'total', 'of', '6', 'cycles', 'with', 'good', 'clinical', 'tolerance', '.', 'After', 'the', '4th', 'cycle', ',', 'an', 'abdominal', 'CAT', 'scan', 'was', 'performed', ',', 'which', 'was', 'normal', ',', 'and', 'at', 'the', 'end', 'of', 'the', '6th', 'cycle', ',', 'a', 'bone', 'scan', 'was', 'performed', ',', 'which', 'showed', 'no', 'pathological', 'findings', '.', 'The', 'patient', 'underwent', 'regular', 'check', '-', 'ups', 'and', '3', 'months', 'later', 'a', 'chest', 'X', '-', 'ray', 'was', 'performed', 'showing', 'images', 'suggestive', 'of', 'bilateral', 'pulmonary', 'metastases', ',', 'which', 'were', 'confirmed', 'by', 'a', 'CT', 'scan', 'showing', 'multiple', 'bilateral', 'millimetric', 'pulmonary', 'metastases', '.', 'In', 'order', 'to', 'identify', 'the', 'origin', 'of', 'these', 'metastases', ',', 'Thoracic', 'Surgery', 'was', 'consulted', 'and', 'it', 'was', 'decided', 'to', 'perform', 'a', 'left', 'videothoracoscopy', 'with', 'biopsies', '.', 'The', 'P.A.', 'diagnosis', 'was', 'metastasis', 'of', 'poorly', 'differentiated', 'carcinoma', 'with', 'an', 'epithelial', 'component', '(', 'renal', ')', '.', 'In', 'view', 'of', 'this', 'diagnosis', ',', '6', 'months', 'after', 'finishing', 'the', 'first', 'chemotherapy', 'regimen', ',', 'it', 'was', 'decided', 'to', 'start', 'a', 'second', 'line', 'of', 'treatment', 'with', 'a', 'chemotherapy', 'regimen', 'with', 'Gemzitabine', 'and', 'Fluoropyrimidines9', 'that', 'had', 'proved', 'useful', 'in', 'stage', 'IV', 'renal', 'carcinoma', ':', 'Gemcitabine', '1000', 'mg', '/', 'm2', 'days', '1', 'and', '8', '+', 'Capecitabine', '1000', 'mg', '/', 'm2/12h', 'days', '1', '-', '14/21', 'days', ',', 'which', 'the', 'patient', 'accepted', '.', '\\n\\n', 'Treatment', 'was', 'started', 'with', 'a', '20', '%', 'dose', 'reduction', 'which', 'was', 'maintained', 'for', 'the', 'rest', 'of', 'the', 'treatment', 'given', 'the', 'patient', \"'s\", 'general', 'condition', '(', 'ECOG', ':', '1', '-', '2', ')', '.', 'After', 'the', 'second', 'cycle', ',', 'the', 'patient', 'suffered', 'a', 'complication', 'of', 'pulmonary', 'thromboembolism', 'from', 'which', 'she', 'recovered', 'but', 'which', 'caused', 'a', 'delay', 'of', '4', 'weeks', 'in', 'the', 'administration', 'of', 'the', 'third', 'cycle', '.', 'After', '6', 'cycles', 'of', 'treatment', ',', 'which', 'she', 'received', 'with', 'acceptable', 'tolerance', 'except', 'for', 'grade', '4', 'anaemia', ',', 'she', 'was', 're', '-', 'evaluated', 'with', 'a', 'body', 'CT', 'scan', 'which', 'showed', 'persistent', 'pulmonary', 'metastases', 'with', 'the', 'appearance', 'of', 'liver', 'and', 'spleen', 'metastases', 'and', 'local', 'relapse', '.', '\\n', 'In', 'view', 'of', 'this', 'progression', ',', 'treatment', 'with', 'IL-2', 'was', 'proposed', 'for', '6', 'weeks', '(', '1', 'week', 'of', 'induction', 'with', '18', 'Million', 'Units', '(', 'MU', ')', 'x', '5', 'days', 'and', '5', 'weeks', ':', '9', 'MU', 'days', '1', 'and', '2', 'and', '18', 'MU', 'days', '3', 'to', '5)10', '.', 'The', 'patient', 'accepted', 'the', 'treatment', 'with', 'moderate', 'toxicity', 'with', 'secondary', 'constitutional', 'symptoms', 'grade', '2', ',', 'anaemia', 'grade', '3', 'and', 'emesis', 'grade', '1', ',', 'maintaining', 'her', 'general', 'condition', '.', '\\n', 'At', 'the', 'end', 'of', 'treatment', ',', 'a', 'new', 're', '-', 'evaluation', 'was', 'performed', 'with', 'a', 'CT', 'scan', 'showing', 'progression', 'of', 'the', 'disease', 'with', 'a', 'large', 'mass', 'in', 'the', 'surgical', 'site', 'measuring', '19x10x5', 'cm', ',', 'which', 'had', 'grown', 'with', 'respect', 'to', 'the', 'previous', 'CT', 'scan', ',', 'and', 'persistent', 'metastases', 'in', 'the', 'rest', 'of', 'the', 'previous', 'sites', '.', 'The', 'patient', \"'s\", 'general', 'condition', 'worsened', ',', 'with', 'the', 'appearance', 'of', 'abdominal', 'and', 'lumbar', 'pain', ',', 'and', 'on', 'physical', 'examination', 'a', '5', 'cm', 'epigastric', 'mass', 'was', 'palpated', ',', 'corresponding', 'to', 'the', 'underlying', 'mass', '.', '\\n', 'Given', 'this', 'new', 'progression', ',', 'it', 'is', 'considered', 'that', 'the', 'tumour', 'is', 'resistant', 'to', 'chemotherapy', 'or', 'immunotherapy', 'schemes', 'against', 'renal', 'carcinoma', 'and', 'it', 'is', 'proposed', 'to', 'start', 'palliative', 'treatment', 'with', 'liposomal', 'Adriamycin', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'The', 'patient', 'accepted', 'the', 'proposal', 'and', 'received', 'a', 'first', 'cycle', '.', 'However', ',', 'a', 'week', 'later', 'she', 'went', 'to', 'the', 'emergency', 'department', 'for', 'hypovolemic', 'shock', 'with', 'metabolic', 'acidosis', 'and', 'pre', '-', 'renal', 'renal', 'failure', 'secondary', 'to', 'hyperemesis', 'of', '4', 'days', \"'\", 'evolution', 'and', 'grade', '4', 'anaemia', '.', 'The', 'patient', 'recovered', 'from', 'this', 'episode', 'but', 'a', 'week', 'later', 'she', 'began', 'with', 'faecal', 'vomiting', 'of', 'probable', 'obstructive', 'origin', 'due', 'to', 'compression', 'of', 'the', 'retroperitoneal', 'mass', ',', 'causing', 'progressive', 'deterioration', 'of', 'the', 'patient', 'and', 'the', 'patient', 'died', 'of', 'multi', '-', 'organ', 'failure', '19', 'months', 'after', 'diagnosis', '.', '\\n\\n\\n']\n",
            "[['tumour'], ['arterial', 'hypertension'], ['venous', 'thrombosis'], ['Para', '-', 'aortic', 'adenopathies'], ['renal', 'tumour'], ['hyperuricaemia'], ['anaemia'], ['bone', 'involvement'], ['renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['Leiomyosarcoma'], ['leiomyosarcoma'], ['renal', 'tumour'], ['tumour', 'infiltration'], ['tumour', 'infiltration'], ['sarcoma'], ['sarcoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma', 'metastases'], ['renal', 'collision', 'tumour'], ['tumour'], ['papillary', 'type', 'renal', 'cancer'], ['renal', 'leiomyosarcoma'], ['metastases'], ['bone', 'metastases'], ['leiomyosarcoma'], ['tumour'], ['sarcomatous', 'component', 'of', 'the', 'tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['bilateral', 'pulmonary', 'metastases'], ['carcinoma'], ['carcinoma'], ['pulmonary', 'metastases'], ['pulmonary', 'metastases'], ['pulmonary', 'thromboembolism'], ['metastases'], ['metastases'], ['anaemia'], ['pulmonary', 'metastases'], ['anaemia'], ['emesis'], ['metastases'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['anaemia'], ['hypovolemic', 'shock'], ['metabolic', 'acidosis'], ['hyperemesis'], ['multi', '-', 'organ', 'failure'], ['carcinoma'], ['sarcomatous', 'component', 'of', 'the', 'tumour']]\n",
            "['sarcoma']\n",
            "sarcoma\n",
            "231\n",
            "['Male', 'patient', ',', 'black', ',', '21', 'years', 'of', 'age', ',', 'who', 'consulted', 'the', 'Maxillofacial', 'Surgery', 'Department', 'of', 'the', 'Hospital', 'Universitario', 'San', 'Vicente', 'Fundación', 'de', 'Medellín', 'attached', 'to', 'the', 'Universidad', 'de', 'Antioquia', ',', 'referred', 'by', 'a', 'dentist', 'from', 'a', 'public', 'health', 'institution', 'for', 'presenting', 'an', 'asymptomatic', 'increase', 'in', 'the', 'volume', 'of', 'the', 'mandible', ',', 'with', 'unknown', 'evolution', '.', '\\n', 'Intraorally', ',', 'excellent', 'dental', 'integrity', 'and', 'good', 'dental', 'occlusion', 'were', 'observed', ',', 'but', 'there', 'was', 'an', 'increase', 'in', 'volume', 'in', 'the', 'vestibular', 'region', 'involving', 'the', 'body', 'and', 'the', 'symphysis', ',', 'while', 'lingually', ',', 'the', 'expansion', 'of', 'the', 'table', 'was', 'only', 'in', 'the', 'area', 'of', 'the', 'lower', 'right', 'canine', 'and', 'bicuspids', '.', '\\n', 'The', 'initial', 'panoramic', 'X', '-', 'ray', 'showed', 'a', 'radiolucent', 'image', '10', 'cm', 'long', ',', 'multiloculated', ',', 'located', 'from', 'distal', 'tooth', '46', 'to', 'mesial', 'tooth', '33', '.', 'The', 'mesial', 'root', 'of', 'tooth', '46', 'and', 'the', 'root', 'of', 'tooth', '45', 'showed', 'rhizolysis', ',', 'with', 'possible', 'pulp', 'necrosis', '.', 'Teeth', '46', ',', '45', 'and', '44', 'had', 'minimal', 'mobility', ';', 'the', 'other', 'teeth', 'had', 'normal', 'vitality', 'and', 'the', 'lower', 'dental', 'canal', 'was', 'rejected', '.', 'There', 'is', 'no', 'alteration', 'in', 'the', 'sensitivity', 'of', 'the', 'mentonian', 'nerve', '.', '\\n\\n', 'The', 'patient', 'consented', 'to', 'the', 'entire', 'treatment', 'by', 'signing', 'the', 'informed', 'consent', 'form', '.', 'Prior', 'to', 'the', 'initial', 'biopsy', 'an', 'aspirate', 'was', 'taken', 'producing', 'a', 'citrine', 'fluid', 'and', 'the', 'first', 'histopathological', 'study', 'found', 'a', 'lax', 'connective', 'tissue', ',', 'some', 'multinucleated', 'giant', 'cells', 'with', 'few', 'nuclei', 'and', 'a', 'thin', 'band', 'of', 'keratin', ';', 'there', 'was', 'no', 'evidence', 'of', 'epithelial', 'tissue', 'but', 'it', 'was', 'considered', 'as', 'insufficient', 'sample', '.', 'Nevertheless', ',', 'a', 'diagnosis', 'of', 'keratocystic', 'odontogenic', 'tumour', 'was', 'made', ',', 'due', 'to', 'the', 'presence', 'of', 'a', 'keratin', 'band', '.', 'After', 'this', 'procedure', ',', 'the', 'lesion', 'became', 'superinfected', 'and', 'the', 'patient', 'had', 'to', 'be', 'hospitalised', 'due', 'to', 'the', 'severity', 'of', 'the', 'clinical', 'picture', ';', 'this', 'condition', 'was', 'used', 'to', 'perform', 'a', 'second', 'biopsy', 'under', 'general', 'anaesthesia', '8', 'days', 'later', ',', 'in', 'which', 'a', 'capsule', 'made', 'up', 'of', 'connective', 'tissue', 'with', 'an', 'intense', 'inflammatory', 'infiltrate', 'and', 'epithelial', 'tissue', 'with', 'several', 'layers', 'of', 'cells', 'was', 'found', ',', 'which', 'gave', 'the', 'diagnosis', ':', 'inflammatory', 'root', 'cyst', '.', 'Given', 'the', 'ambiguity', 'of', 'the', 'diagnosis', 'and', 'the', 'aggressiveness', 'of', 'the', 'lesion', ',', 'it', 'was', 'decided', 'to', 'perform', 'the', 'intervention', 'with', 'the', 'first', 'diagnosis', '(', 'keratocystic', 'odontogenic', 'tumour', '-exkeratocyst-', ')', '.', 'With', 'this', 'diagnosis', 'it', 'was', 'decided', 'to', 'plan', 'an', 'aggressive', 'surgical', 'treatment', 'and', 'it', 'was', 'decided', 'to', 'order', 'endodontic', 'treatment', 'from', '46', 'to', '33', '(', '9', 'teeth', ')', 'prior', 'to', 'the', 'surgical', 'procedure', ',', 'as', 'the', 'dental', 'apices', 'were', 'immersed', 'in', 'the', 'cavity', 'and', 'the', 'curettage', 'itself', 'would', 'cause', 'amputation', 'of', 'the', 'pulp', 'vascular', 'bundles', ',', 'in', 'addition', ',', 'according', 'to', 'the', 'diagnosis', ',', 'it', 'is', 'a', 'very', 'recurrent', 'lesion', '.', 'This', 'endodontic', 'treatment', 'lasted', '3', 'months', 'due', 'to', 'the', 'difficulty', 'of', 'sealing', 'the', 'canals', 'due', 'to', 'the', 'presence', 'of', 'an', 'amber', '-', 'coloured', 'liquid', 'draining', 'through', 'the', 'pulp', 'chambers', '.', 'After', 'this', 'time', ',', 'the', 'production', 'of', 'liquid', 'content', 'ceased', 'and', 'the', 'endodontic', 'treatment', 'was', 'completed', '.', '\\n\\n', 'The', 'surgical', 'phase', 'was', 'performed', 'under', 'general', 'anaesthesia', '.', 'A', 'trapezoidal', 'flap', 'was', 'raised', 'from', 'distal', '46', 'to', 'distal', '33', '.', 'When', 'the', 'flap', 'was', 'reflected', ',', 'it', 'was', 'found', 'to', 'be', 'expanded', 'in', 'its', 'entire', 'length', 'and', 'perforated', 'in', 'the', 'bicuspid', 'area', '(', 'teeth', '44', 'and', '45', ')', ',', 'where', 'the', 'biopsies', 'had', 'previously', 'been', 'taken', '.', 'We', 'proceeded', 'to', 'remove', 'all', 'the', 'expanded', 'vestibular', 'cortex', 'until', 'we', 'had', 'complete', 'access', 'to', 'the', 'cystic', 'cavity', '.', 'The', 'thick', 'fibrous', 'capsule', 'covering', 'the', 'osseous', 'defect', 'was', 'found', 'and', 'removed', '.', 'The', 'bone', 'cavity', 'is', 'reamed', 'with', 'rotary', 'cutting', 'instruments', 'and', 'the', 'entire', 'bone', 'defect', 'is', 'brushed', 'with', 'Carnoy', \"'s\", 'solution', '.', 'Some', 'perforations', 'of', 'the', 'lingual', 'cortex', 'are', 'visible', ',', 'which', 'are', 'cauterised', 'with', 'an', 'electroscalpel', 'due', 'to', 'the', 'risk', 'of', 'invasion', 'of', 'the', 'tumour', 'lesion', 'into', 'the', 'lingual', 'soft', 'tissues', '.', 'Due', 'to', 'the', 'weakening', 'of', 'the', 'mandibular', 'basilar', 'border', ',', 'a', 'reconstruction', 'plate', 'is', 'placed', 'to', 'avoid', 'intra-', 'or', 'postoperative', 'fracture', '.', 'Before', 'suturing', 'the', 'flap', ',', 'the', 'bone', 'defect', 'is', 'filled', 'with', 'fibrillar', 'collagen', 'and', 'medicated', 'with', 'analgesics', 'and', 'antibiotics', '.', 'The', 'tissue', 'obtained', 'is', 'sent', 'to', 'pathology', '.', 'The', 'histopathological', 'report', 'of', 'the', 'surgical', 'specimen', 'shows', 'fibroconnective', 'tissue', 'devoid', 'of', 'epithelium', 'and', 'a', 'post', '-', 'surgical', 'diagnosis', 'of', 'an', 'aneurysmal', 'bone', 'cyst', 'was', 'obtained', '.', '\\n', 'Because', 'the', 'second', 'biopsy', 'showed', 'an', 'epithelium', ',', 'the', 'diagnosis', 'was', 'misleading', 'but', ',', 'evaluating', 'the', 'three', 'histopathological', 'samples', ',', 'it', 'was', 'considered', 'that', 'the', 'epithelial', 'tissue', 'of', 'the', 'second', 'biopsy', 'was', 'the', 'product', 'of', 'the', 'inflammatory', 'reaction', ',', 'while', 'the', 'initial', 'biopsy', 'and', 'the', 'surgical', 'specimen', 'showed', 'no', 'epithelial', 'component', ';', 'it', 'was', 'decided', 'to', 'consider', 'it', 'an', 'aneurysmal', 'bone', 'cyst', '.', '\\n', 'The', 'patient', 'was', 'evaluated', 'at', '8', 'and', '15', 'days', 'and', 'then', 'at', '2', 'months', ',', '10', 'months', 'and', '2', 'years', '.', 'At', 'the', 'follow', '-', 'up', 'appointment', 'at', '2', 'months', 'pulp', 'necrosis', 'and', 'fistula', 'were', 'found', 'at', 'the', 'level', 'of', '47', ',', 'which', 'was', 'adjacent', 'to', 'the', 'lesion', 'and', 'the', 'mesial', 'root', 'apex', 'had', 'been', 'amputated', 'during', 'the', 'surgical', 'procedure', '.', 'Endodontics', 'was', 'performed', 'and', 'the', 'infection', 'resolved', '.', 'He', 'also', 'presented', 'with', 'right', 'mentonian', 'nerve', 'paraesthesia', 'and', 'was', 'prescribed', 'B', '-', 'complex', 'tablets', 'for', 'one', 'month', '.', 'A', 'new', 'post', '-', 'surgical', 'assessment', 'was', 'carried', 'out', '10', 'months', 'later', 'and', 'adequate', 'bone', 'healing', 'was', 'found', ',', 'but', 'there', 'was', 'an', 'occlusal', 'sequela', ',', 'as', 'the', 'teeth', 'that', 'were', 'left', 'without', 'bone', 'support', 'due', 'to', 'being', 'immersed', 'in', 'the', 'bone', 'defect', '(', 'teeth', '44', 'to', '33', ')', 'were', 'intruded', 'and', 'produced', 'an', 'open', 'bite', '.', 'Two', 'years', 'later', 'a', 'new', 'clinical', 'and', 'radiographic', 'check', '-', 'up', 'was', 'carried', 'out', 'where', 'it', 'was', 'found', 'that', ':', 'teeth', '44', 'to', '33', 'remained', 'in', 'open', 'bite', ',', 'tooth', '44', 'was', 'also', 'slightly', 'vestibularised', '.', 'The', 'panoramic', 'radiograph', 'shows', 'good', 'bone', 'filling', ',', 'but', 'the', 'three', '-', 'dimensional', 'tomography', 'shows', 'that', 'there', 'is', 'a', 'defect', 'of', 'about', '6', 'mm', 'in', 'diameter', 'that', 'compromises', 'the', 'apex', 'of', '44', 'and', '43', ',', 'and', 'that', 'the', 'bone', 'has', 'yet', 'to', 'regenerate', ',', 'and', 'the', 'paraesthesia', 'still', 'persists', 'two', 'years', 'later', '.', 'The', 'intraoral', 'image', 'shows', 'that', 'the', 'enlargement', 'produced', 'by', 'the', 'expansion', 'of', 'the', 'lesion', 'of', 'the', 'lingual', 'plate', 'at', 'the', 'level', 'of', 'teeth', '43', ',', '44', 'and', '45', 'still', 'persists', '.', '\\n\\n\\n']\n",
            "[['pulp', 'necrosis'], ['keratocystic', 'odontogenic', 'tumour'], ['keratocystic', 'odontogenic', 'tumour'], ['inflammatory', 'root', 'cyst'], ['exkeratocyst'], ['lesion', 'became', 'superinfected'], ['lesion'], ['lesion'], ['lesion'], ['aneurysmal', 'bone', 'cyst'], ['tumour', 'lesion'], ['bone', 'defect'], ['bone', 'defect'], ['cystic', 'cavity'], ['lesion'], ['perforations', 'of', 'the', 'lingual', 'cortex'], ['aneurysmal', 'bone', 'cyst'], ['open', 'bite'], ['open', 'bite'], ['infection'], ['bone', 'defect'], ['pulp', 'necrosis'], ['lesion'], ['lesion']]\n",
            "['exkeratocyst']\n",
            "exkeratocyst\n",
            "638\n"
          ]
        }
      ],
      "source": [
        "labels_tokenized = []\n",
        "idx =-1\n",
        "for hct, et in zip(HCs_tokenized, Ent_tokenized):\n",
        "    idx+=1\n",
        "    labels = []\n",
        "    for i in range(len(hct)):\n",
        "        #Labels: 0->'O'; 1->'B'; 2->'I'\n",
        "        #labels.append('O')\n",
        "        labels.append(0)\n",
        "\n",
        "    #For Entities (Diseases|Enfermedades)\n",
        "    for enf in et:\n",
        "      first = True\n",
        "      for e in enf:\n",
        "          if first == True:\n",
        "              try:\n",
        "                #labels[hct.index(e)] = 'B'\n",
        "                #labels[posLab] = 'B'\n",
        "                indices = find_idx(hct, e)\n",
        "                if len(indices) > 1:\n",
        "                  for id in indices:\n",
        "                      labels[id] = 1\n",
        "                else:\n",
        "                  labels[hct.index(e)] = 1\n",
        "                \n",
        "                first = False\n",
        "              except:\n",
        "                first = False\n",
        "                if e == \"sarcoma+carcinoma\" or e == \"carcinoma+sarcoma\":\n",
        "                  continue\n",
        "                print(hct)\n",
        "                print(et)\n",
        "                print(enf)\n",
        "                print(e)\n",
        "                print(idx)\n",
        "          else:\n",
        "              try:\n",
        "                #labels[hct.index(e)] = 'I'\n",
        "                #labels[posLab] = 'I'\n",
        "                indices = find_idx(hct, e)\n",
        "                if len(indices) > 1:\n",
        "                  for id in indices:\n",
        "                      if labels[id-1] != 0:\n",
        "                        labels[id] = 2\n",
        "                else:\n",
        "                  labels[hct.index(e)] = 2\n",
        "              except:\n",
        "                if e == \"sarcoma+carcinoma\" or e == \"carcinoma+sarcoma\":\n",
        "                  continue\n",
        "                print(hct)\n",
        "                print(et)\n",
        "                print(enf)\n",
        "                print(e)\n",
        "                print(idx)\n",
        "\n",
        "    labels_tokenized.append(labels)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 21,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jXKczS_fKpLc",
        "outputId": "d385f71c-5507-4a33-8cce-f5d2195354f9"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "A\t0\n",
            "73\t0\n",
            "-\t0\n",
            "year\t0\n",
            "-\t0\n",
            "old\t0\n",
            "patient\t0\n",
            "with\t0\n",
            "a\t0\n",
            "history\t0\n",
            "of\t0\n",
            "arterial\t1\n",
            "hypertension\t2\n",
            "and\t0\n",
            "polyarthrosis\t1\n",
            "presented\t0\n",
            "to\t0\n",
            "the\t0\n",
            "emergency\t0\n",
            "department\t0\n",
            "with\t0\n",
            "abdominal\t0\n",
            "distension\t0\n",
            "and\t0\n",
            "pain\t0\n",
            "associated\t0\n",
            "with\t0\n",
            "constipation\t0\n",
            "and\t0\n",
            "febrile\t0\n",
            "fever\t0\n",
            ".\t0\n",
            "The\t0\n",
            "symptoms\t0\n",
            "had\t0\n",
            "started\t0\n",
            "three\t0\n",
            "weeks\t0\n",
            "earlier\t0\n",
            "and\t0\n",
            "worsened\t0\n",
            "during\t0\n",
            "the\t0\n",
            "four\t0\n",
            "days\t0\n",
            "prior\t0\n",
            "to\t0\n",
            "admission\t0\n",
            ".\t0\n",
            "During\t0\n",
            "this\t0\n",
            "period\t0\n",
            ",\t0\n",
            "an\t0\n",
            "upper\t0\n",
            "gastrointestinal\t0\n",
            "fibroendoscopy\t0\n",
            "(\t0\n",
            "oesophagus\t0\n",
            ",\t0\n",
            "stomach\t0\n",
            "and\t0\n",
            "duodenum\t0\n",
            ")\t0\n",
            "and\t0\n",
            "a\t0\n",
            "colonoscopy\t0\n",
            "(\t0\n",
            "up\t0\n",
            "to\t0\n",
            "the\t0\n",
            "splenic\t0\n",
            "angle\t0\n",
            ")\t0\n",
            "were\t0\n",
            "performed\t0\n",
            ",\t0\n",
            "but\t0\n",
            "no\t0\n",
            "abnormalities\t0\n",
            "were\t0\n",
            "found\t0\n",
            ".\t0\n",
            "\n",
            "\t0\n",
            "Physical\t0\n",
            "examination\t0\n",
            "revealed\t0\n",
            "a\t0\n",
            "low\t0\n",
            "-\t0\n",
            "grade\t0\n",
            "fever\t0\n",
            "(\t0\n",
            "37.6º\t0\n",
            "C\t0\n",
            ")\t0\n",
            ",\t0\n",
            "a\t0\n",
            "distended\t0\n",
            "abdomen\t0\n",
            ",\t0\n",
            "diffusely\t0\n",
            "painful\t0\n",
            "on\t0\n",
            "palpation\t0\n",
            ",\t0\n",
            "tympanised\t0\n",
            "on\t0\n",
            "percussion\t0\n",
            ",\t0\n",
            "with\t0\n",
            "scant\t0\n",
            "borborygmi\t0\n",
            "but\t0\n",
            "no\t0\n",
            "evidence\t0\n",
            "of\t0\n",
            "peritonism\t0\n",
            ",\t0\n",
            "pulmonary\t0\n",
            "auscultation\t0\n",
            "with\t0\n",
            "decreased\t0\n",
            "ventilation\t0\n",
            "in\t0\n",
            "the\t0\n",
            "lower\t1\n",
            "half\t0\n",
            "of\t0\n",
            "the\t0\n",
            "right\t0\n",
            "hemithorax\t0\n",
            "and\t0\n",
            "the\t0\n",
            "onset\t0\n",
            "of\t0\n",
            "intense\t0\n",
            "pain\t0\n",
            "on\t0\n",
            "palpation\t0\n",
            "and\t0\n",
            "percussion\t0\n",
            "of\t0\n",
            "the\t0\n",
            "last\t0\n",
            "three\t0\n",
            "dorsal\t0\n",
            "spinous\t0\n",
            "processes\t0\n",
            ".\t0\n",
            "\n",
            "\t0\n",
            "Analyses\t0\n",
            "showed\t0\n",
            "8.2\t0\n",
            "x\t0\n",
            "109\t0\n",
            "leukocytes\t0\n",
            "/\t0\n",
            "L\t0\n",
            ",\t0\n",
            "haemoglobin\t0\n",
            "136\t0\n",
            "g\t0\n",
            "/\t0\n",
            "L\t0\n",
            ",\t0\n",
            "platelets\t0\n",
            "186\t0\n",
            "x\t0\n",
            "109\t0\n",
            "/\t0\n",
            "L.\t0\n",
            "Except\t0\n",
            "for\t0\n",
            "glycaemia\t0\n",
            "(\t0\n",
            "123\t0\n",
            "mg\t0\n",
            "/\t0\n",
            "dl\t0\n",
            ")\t0\n",
            ",\t0\n",
            "the\t0\n",
            "following\t0\n",
            "laboratory\t0\n",
            "parameters\t0\n",
            "were\t0\n",
            "normal\t0\n",
            "or\t0\n",
            "negative\t0\n",
            ":\t0\n",
            "urea\t0\n",
            ",\t0\n",
            "creatinine\t0\n",
            ",\t0\n",
            "bilirubin\t0\n",
            ",\t0\n",
            "transaminases\t0\n",
            ",\t0\n",
            "gamma\t0\n",
            "-\t0\n",
            "glutamyltranspeptidase\t0\n",
            ",\t0\n",
            "sodium\t0\n",
            ",\t0\n",
            "potassium\t0\n",
            ",\t0\n",
            "chlorine\t0\n",
            ",\t0\n",
            "calcium\t0\n",
            ",\t0\n",
            "phosphorus\t0\n",
            ",\t0\n",
            "creatine\t0\n",
            "phosphokinase\t0\n",
            ",\t0\n",
            "amylase\t0\n",
            ",\t0\n",
            "lactate\t0\n",
            "dehydrogenase\t0\n",
            "(\t0\n",
            "LDH\t0\n",
            ")\t0\n",
            ",\t0\n",
            "proteinogram\t0\n",
            ",\t0\n",
            "immunoglobulin\t0\n",
            "dosage\t0\n",
            ",\t0\n",
            "alpha\t0\n",
            "-\t0\n",
            "fetoprotein\t0\n",
            ",\t0\n",
            "CA\t0\n",
            "19\t0\n",
            "antigens\t0\n",
            ".\t0\n",
            "9\t0\n",
            "and\t0\n",
            "CA\t0\n",
            "125\t0\n",
            "antigens\t0\n",
            ",\t0\n",
            "as\t0\n",
            "well\t0\n",
            "as\t0\n",
            "general\t0\n",
            "urinalysis\t0\n",
            ".\t0\n",
            "ESR\t0\n",
            "and\t0\n",
            "C\t0\n",
            "-\t0\n",
            "reactive\t0\n",
            "protein\t0\n",
            "were\t0\n",
            "elevated\t0\n",
            ",\t0\n",
            "with\t0\n",
            "values\t0\n",
            "of\t0\n",
            "85\t0\n",
            "mm\t0\n",
            "/\t0\n",
            "1\t0\n",
            "h\t0\n",
            "and\t0\n",
            "133\t0\n",
            "mg\t0\n",
            "/\t0\n",
            "L\t0\n",
            "(\t0\n",
            "normal\t0\n",
            "<\t0\n",
            "5\t0\n",
            "mg\t0\n",
            "/\t0\n",
            "L\t0\n",
            ")\t0\n",
            ",\t0\n",
            "respectively\t0\n",
            ".\t0\n",
            "Mantoux\t0\n",
            "intradermal\t0\n",
            "reaction\t0\n",
            "(\t0\n",
            "10\t0\n",
            "IU\t0\n",
            "RT-23\t0\n",
            ")\t0\n",
            "was\t0\n",
            "positive\t0\n",
            ",\t0\n",
            "with\t0\n",
            "an\t0\n",
            "induration\t0\n",
            "of\t0\n",
            "25\t0\n",
            "mm\t0\n",
            ".\t0\n",
            "Chest\t0\n",
            "X\t0\n",
            "-\t0\n",
            "ray\t0\n",
            "showed\t0\n",
            "an\t0\n",
            "image\t0\n",
            "compatible\t0\n",
            "with\t0\n",
            "right\t0\n",
            "lower\t1\n",
            "lobe\t2\n",
            "atelectasis\t2\n",
            "in\t0\n",
            "the\t0\n",
            "context\t0\n",
            "of\t0\n",
            "an\t0\n",
            "ipsilateral\t0\n",
            "pleural\t1\n",
            "effusion\t2\n",
            ".\t0\n",
            "There\t0\n",
            "were\t0\n",
            "no\t0\n",
            "signs\t0\n",
            "suggestive\t0\n",
            "of\t0\n",
            "adenopathy\t0\n",
            "or\t0\n",
            "alterations\t0\n",
            "in\t0\n",
            "the\t0\n",
            "cardiopericardial\t0\n",
            "silhouette\t0\n",
            ".\t0\n",
            "A\t0\n",
            "thoracoabdominal\t0\n",
            "CT\t0\n",
            "scan\t0\n",
            "confirmed\t0\n",
            "the\t0\n",
            "existence\t0\n",
            "of\t0\n",
            "a\t0\n",
            "right\t0\n",
            "pleural\t1\n",
            "effusion\t2\n",
            "and\t0\n",
            "identified\t0\n",
            "prominent\t0\n",
            "degenerative\t0\n",
            "changes\t0\n",
            "along\t0\n",
            "the\t0\n",
            "dorsolumbar\t0\n",
            "spine\t0\n",
            "but\t0\n",
            ",\t0\n",
            "above\t0\n",
            "all\t0\n",
            ",\t0\n",
            "erosions\t0\n",
            "in\t0\n",
            "the\t0\n",
            "vertebral\t0\n",
            "plates\t0\n",
            "adjacent\t0\n",
            "to\t0\n",
            "the\t0\n",
            "D10\t0\n",
            "-\t0\n",
            "D11\t0\n",
            "disc\t0\n",
            "space\t0\n",
            ".\t0\n",
            "A\t0\n",
            "lumbar\t0\n",
            "MRI\t0\n",
            "showed\t0\n",
            "hyposignal\t0\n",
            "on\t0\n",
            "T1\t0\n",
            "-\t0\n",
            "weighted\t0\n",
            "sequences\t0\n",
            "and\t0\n",
            "hypersignal\t0\n",
            "on\t0\n",
            "T2\t0\n",
            "-\t0\n",
            "weighted\t0\n",
            "sequences\t0\n",
            "in\t0\n",
            "these\t0\n",
            "vertebrae\t0\n",
            "and\t0\n",
            "their\t0\n",
            "corresponding\t0\n",
            "disc\t0\n",
            ",\t0\n",
            "with\t0\n",
            "morphological\t0\n",
            "alterations\t0\n",
            "typical\t0\n",
            "of\t0\n",
            "infectious\t1\n",
            "spondylodiscitis\t2\n",
            "D10\t2\n",
            "-\t2\n",
            "D11\t2\n",
            ".\t0\n",
            "Three\t0\n",
            "serial\t0\n",
            "blood\t0\n",
            "cultures\t0\n",
            "were\t0\n",
            "negative\t0\n",
            ".\t0\n",
            "Samples\t0\n",
            "obtained\t0\n",
            "by\t0\n",
            "aspiration\t0\n",
            "of\t0\n",
            "the\t0\n",
            "D10\t0\n",
            "-\t0\n",
            "D11\t0\n",
            "space\t0\n",
            "showed\t0\n",
            "gram\t0\n",
            "-\t0\n",
            "positive\t0\n",
            "cocci\t0\n",
            "chains\t0\n",
            ",\t0\n",
            "which\t0\n",
            "were\t0\n",
            "subsequently\t0\n",
            "recovered\t0\n",
            "and\t0\n",
            "typed\t0\n",
            "as\t0\n",
            "penicillin\t0\n",
            "-\t0\n",
            "sensitive\t0\n",
            "Streptococcus\t0\n",
            "pneumoniae\t0\n",
            ".\t0\n",
            "Pleural\t0\n",
            "fluid\t0\n",
            "analysis\t0\n",
            "showed\t0\n",
            "pH\t0\n",
            ":\t0\n",
            "7.55\t0\n",
            ";\t0\n",
            "leucocytes\t0\n",
            ":\t0\n",
            "8.4\t0\n",
            "x\t0\n",
            "109\t0\n",
            "/\t0\n",
            "L\t0\n",
            "(\t0\n",
            "58\t0\n",
            "%\t0\n",
            "neutrophils\t0\n",
            ",\t0\n",
            "26\t0\n",
            "%\t0\n",
            "eosinophils\t0\n",
            ",\t0\n",
            "16\t0\n",
            "%\t0\n",
            "lymphocytes\t0\n",
            ")\t0\n",
            ",\t0\n",
            "protein\t0\n",
            ":\t0\n",
            "48\t0\n",
            "g\t0\n",
            "/\t0\n",
            "L\t0\n",
            "(\t0\n",
            "ratio\t0\n",
            "to\t0\n",
            "serum\t0\n",
            "protein\t0\n",
            ":\t0\n",
            "0.65\t0\n",
            ")\t0\n",
            ",\t0\n",
            "glucose\t0\n",
            ":\t0\n",
            "125\t0\n",
            "mg\t0\n",
            "/\t0\n",
            "dl\t0\n",
            ",\t0\n",
            "ADA\t0\n",
            ":\t0\n",
            "25.92\t0\n",
            "IU\t0\n",
            "/\t0\n",
            "ml\t0\n",
            ",\t0\n",
            "LDH\t0\n",
            ":\t0\n",
            "362\t0\n",
            "U\t0\n",
            "/\t0\n",
            "L\t0\n",
            "(\t0\n",
            "pleural\t1\n",
            "LDH\t0\n",
            "/\t0\n",
            "serum\t0\n",
            "LDH\t0\n",
            "ratio\t0\n",
            ":\t0\n",
            "0.8\t0\n",
            ")\t0\n",
            ".\t0\n",
            "Both\t0\n",
            "auramine\t0\n",
            "-\t0\n",
            "rhodamine\t0\n",
            "staining\t0\n",
            "and\t0\n",
            "Löwenstein\t0\n",
            "-\t0\n",
            "Jensen\t0\n",
            "medium\t0\n",
            "culture\t0\n",
            "of\t0\n",
            "pleural\t1\n",
            "fluid\t0\n",
            "were\t0\n",
            "negative\t0\n",
            "and\t0\n",
            "cytology\t0\n",
            "showed\t0\n",
            "no\t0\n",
            "evidence\t0\n",
            "of\t0\n",
            "neoplastic\t0\n",
            "cells\t0\n",
            ".\t0\n",
            "\n",
            "\n",
            "\t0\n",
            "The\t0\n",
            "patient\t0\n",
            "was\t0\n",
            "initially\t0\n",
            "treated\t0\n",
            "intravenously\t0\n",
            "with\t0\n",
            "amoxicillin\t0\n",
            "+\t0\n",
            "clavulanic\t0\n",
            "acid\t0\n",
            "(\t0\n",
            "1\t0\n",
            "g\t0\n",
            "/\t0\n",
            "200\t0\n",
            "mg\t0\n",
            ",\t0\n",
            "every\t0\n",
            "8\t0\n",
            "hours\t0\n",
            ")\t0\n",
            ".\t0\n",
            "After\t0\n",
            "21\t0\n",
            "days\t0\n",
            ",\t0\n",
            "she\t0\n",
            "was\t0\n",
            "switched\t0\n",
            "to\t0\n",
            "the\t0\n",
            "oral\t0\n",
            "route\t0\n",
            "(\t0\n",
            "875\t0\n",
            "/\t0\n",
            "125\t0\n",
            "mg\t0\n",
            ",\t0\n",
            "every\t0\n",
            "8\t0\n",
            "hours\t0\n",
            ")\t0\n",
            "for\t0\n",
            "6\t0\n",
            "weeks\t0\n",
            ".\t0\n",
            "The\t0\n",
            "evolution\t0\n",
            "was\t0\n",
            "favourable\t0\n",
            "and\t0\n",
            "she\t0\n",
            "was\t0\n",
            "able\t0\n",
            "to\t0\n",
            "start\t0\n",
            "walking\t0\n",
            "with\t0\n",
            "a\t0\n",
            "dorsolumbar\t0\n",
            "corset\t0\n",
            "after\t0\n",
            "the\t0\n",
            "fourth\t0\n",
            "week\t0\n",
            ".\t0\n",
            "One\t0\n",
            "month\t0\n",
            "after\t0\n",
            "the\t0\n",
            "end\t0\n",
            "of\t0\n",
            "antibiotic\t0\n",
            "therapy\t0\n",
            ",\t0\n",
            "a\t0\n",
            "control\t0\n",
            "chest\t0\n",
            "CT\t0\n",
            "scan\t0\n",
            "still\t0\n",
            "showed\t0\n",
            "a\t0\n",
            "discrete\t0\n",
            "pleural\t1\n",
            "effusion\t2\n",
            ",\t0\n",
            "but\t0\n",
            "the\t0\n",
            "patient\t0\n",
            "had\t0\n",
            "only\t0\n",
            "mild\t0\n",
            "mechanical\t0\n",
            "dorsalgia\t0\n",
            ",\t0\n",
            "her\t0\n",
            "ESR\t0\n",
            "had\t0\n",
            "decreased\t0\n",
            "to\t0\n",
            "21\t0\n",
            "mm\t0\n",
            "/\t0\n",
            "1\t0\n",
            "h\t0\n",
            "and\t0\n",
            "her\t0\n",
            "CRP\t0\n",
            "was\t0\n",
            "2.4\t0\n",
            "mg\t0\n",
            "/\t0\n",
            "L.\t0\n",
            "Outpatient\t0\n",
            "follow\t0\n",
            "-\t0\n",
            "up\t0\n",
            "continued\t0\n",
            "for\t0\n",
            "a\t0\n",
            "further\t0\n",
            "three\t0\n",
            "years\t0\n",
            ",\t0\n",
            "during\t0\n",
            "which\t0\n",
            "time\t0\n",
            "the\t0\n",
            "evolution\t0\n",
            "was\t0\n",
            "favourable\t0\n",
            "and\t0\n",
            "a\t0\n",
            "D10\t0\n",
            "-\t0\n",
            "D11\t0\n",
            "vertebral\t0\n",
            "block\t0\n",
            "was\t0\n",
            "formed\t0\n",
            ".\t0\n",
            "\n",
            "\n",
            "\n",
            "\t0\n"
          ]
        }
      ],
      "source": [
        "j = 0\n",
        "for i in range(len(HCs_tokenized[j])):\n",
        "  print(str(HCs_tokenized[j][i]) + \"\\t\" + str(labels_tokenized[j][i]))"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Validating tokenization and alignment with the BIO tags."
      ],
      "metadata": {
        "id": "laU64q79UYZy"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 22,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jcJBsudYKpLc",
        "outputId": "13ea2860-19b7-4ee1-ddf4-ce607a60e80f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Everything is aligned!\n"
          ]
        }
      ],
      "source": [
        "flag = 0\n",
        "for st, lt in zip(HCs_tokenized, labels_tokenized):\n",
        "    if len(st) != len(lt):\n",
        "        print(st)\n",
        "        print(lt)\n",
        "        flag = 1\n",
        "if flag==0:\n",
        "    print(\"Everything is aligned!\")"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Sentence tokenization"
      ],
      "metadata": {
        "id": "lvR3V8qXUbvR"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 23,
      "metadata": {
        "id": "-nHHwKmUKpLd"
      },
      "outputs": [],
      "source": [
        "sent_tokenized = []\n",
        "label_sent_tokenized = []\n",
        "for ht, lht in zip(HCs_tokenized, labels_tokenized):\n",
        "  st = []; lbst = []\n",
        "  for h, l in zip(ht,lht):\n",
        "    if h != \".\":\n",
        "      st.append(h)\n",
        "      lbst.append(l)\n",
        "    else:\n",
        "      st.append(\".\")\n",
        "      lbst.append(0)\n",
        "      sent_tokenized.append(st)\n",
        "      label_sent_tokenized.append(lbst)\n",
        "      st = []; lbst = []"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 24,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8Lo6zzhGKpLd",
        "outputId": "6d68e803-7ad4-4970-b4bf-b34be7fbdd37"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "11668"
            ]
          },
          "metadata": {},
          "execution_count": 24
        }
      ],
      "source": [
        "len(sent_tokenized)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 25,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5Ei7TrATKpLd",
        "outputId": "93b623a3-ac12-4500-b6f4-506972dcad9f"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['A',\n",
              " '73',\n",
              " '-',\n",
              " 'year',\n",
              " '-',\n",
              " 'old',\n",
              " 'patient',\n",
              " 'with',\n",
              " 'a',\n",
              " 'history',\n",
              " 'of',\n",
              " 'arterial',\n",
              " 'hypertension',\n",
              " 'and',\n",
              " 'polyarthrosis',\n",
              " 'presented',\n",
              " 'to',\n",
              " 'the',\n",
              " 'emergency',\n",
              " 'department',\n",
              " 'with',\n",
              " 'abdominal',\n",
              " 'distension',\n",
              " 'and',\n",
              " 'pain',\n",
              " 'associated',\n",
              " 'with',\n",
              " 'constipation',\n",
              " 'and',\n",
              " 'febrile',\n",
              " 'fever',\n",
              " '.']"
            ]
          },
          "metadata": {},
          "execution_count": 25
        }
      ],
      "source": [
        "sent_tokenized[0]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 26,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "KistaXEmKpLe",
        "outputId": "b07ce5d3-df79-474e-a7fb-983632bb38d3"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "11668"
            ]
          },
          "metadata": {},
          "execution_count": 26
        }
      ],
      "source": [
        "len(label_sent_tokenized)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 27,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "m9qR1AxbKpLe",
        "outputId": "66600bb9-dcae-4717-9cc9-9422115b4e2e"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 1,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0]"
            ]
          },
          "metadata": {},
          "execution_count": 27
        }
      ],
      "source": [
        "label_sent_tokenized[0]"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Disease mentions identification as a Token classification problem"
      ],
      "metadata": {
        "id": "l5-_fyqPUh3b"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Building the Dataset"
      ],
      "metadata": {
        "id": "098UDE8VUjXf"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Case as a whole is given as input"
      ],
      "metadata": {
        "id": "8FT7wAx4VeaU"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 28,
      "metadata": {
        "id": "5qVPJ2LVKpLe"
      },
      "outputs": [],
      "source": [
        "dic = {\"tokens\": HCs_tokenized, \"ner_tags\": labels_tokenized} #For the whole clinical case. We used this option for our paper.\n",
        "#dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 29,
      "metadata": {
        "id": "UzXp6PO2KpLe"
      },
      "outputs": [],
      "source": [
        "dataset = Dataset.from_dict(dic)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 30,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NhqH78gAKpLf",
        "outputId": "e3fe5c5f-764c-4bf8-c599-6bda653a4944"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 741\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 30
        }
      ],
      "source": [
        "dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 31,
      "metadata": {
        "id": "jspsCq2OKpLf"
      },
      "outputs": [],
      "source": [
        "#For training, validation, and test partitions\n",
        "\"\"\"\n",
        "#Train, val, test partitions\n",
        "train_test = dataset.train_test_split()\n",
        "test_val = train_test['test'].train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': test_val['train'],\n",
        "    'test': test_val['test']\n",
        "    })\n",
        "\"\"\"\n",
        "\n",
        "#Just for training and validation partitions\n",
        "train_test = dataset.train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': train_test['test']\n",
        "    })"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 32,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "isqyq9VIKpLf",
        "outputId": "7539fb10-1ab0-43cd-d7a5-bdb30514b891"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "DatasetDict({\n",
              "    train: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 555\n",
              "    })\n",
              "    validation: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 186\n",
              "    })\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 32
        }
      ],
      "source": [
        "raw_datasets"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 33,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pEJhTKi8KpLf",
        "outputId": "fe3f3c2b-226c-42eb-9674-f69fce8a61e9"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0]"
            ]
          },
          "metadata": {},
          "execution_count": 33
        }
      ],
      "source": [
        "raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"chunk_tags\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 34,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gtrDybrCKpLf",
        "outputId": "1217e93b-928c-4db1-bd22-8f1e3c2ebf2c"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 555\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 34
        }
      ],
      "source": [
        "raw_datasets['train']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 35,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "QjdG8yi0KpLf",
        "outputId": "20bc5bed-36bf-4e05-f80b-f75638f5196b"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['O', 'B', 'I']"
            ]
          },
          "metadata": {},
          "execution_count": 35
        }
      ],
      "source": [
        "label_names = ['O','B','I']\n",
        "label_names"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 36,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9kGXdiCsKpLg",
        "outputId": "754c7613-c7c3-46fc-a109-d8a67d6ab072"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "This is a 70 - year - old male patient , who was admitted to the emergency department of the Hospital Pablo Tobón Uribe , with symptoms of approximately one hour of evolution consisting of chest tightness , general malaise , asthenia and diaphoresis ; which began after having ingested 100 mg of sildenafil , denies ingestion of another sexual stimulant or cocaine and without sexual intercourse after its consumption . The patient 's only clinical history was arterial hypertension , pharmacologically controlled , and he denies previous episodes of angina or nitrate consumption . The clinical examination and vital signs were normal ; however , after the initial assessment he presented cardiorespiratory arrest secondary to ventricular fibrillation with response to a single defibrillation of 200 joules . \n",
            " The initial electrocardiogram showed ST - segment elevation in the inferior ( II , III and aVF ) and anterior ( V2 - V4 ) leads with reciprocal changes in aVL , with no electrocardiographic extension to the right ventricle . \n",
            "\n",
            " Cardiac enzymes on admission revealed a creatine kinase ( CK ) of 170 and a creatine phosphokinase - MB fraction ( CK - MB ) of 6 . Electrolytes , coagulation tests and blood cell counts were normal . \n",
            " Initial management was with aspirin 100 mg , lovastatin 40 mg daily , metoprolol 25 mg every 12 hours , enoxaparin 60 mg every 12 hours , oxygen at 3 lt / min and streptokinase 1'500,000 units administered over 30 minutes . No changes secondary to reperfusion were demonstrated . The patient was transferred to the intensive care unit , where episodes of complete A - V block with spontaneous resolution were documented during the first hours of evolution . \n",
            " The EKG taken at 24 hours of evolution revealed QS in the inferior face and a late progression of the R wave in the anterior face . Enzyme monitoring showed increased CK and MB fraction at 6 hours ( 4476 and 165 ) and 12 hours ( 3839 and 136 ) . \n",
            " The next day coronary angiography showed diffuse disease of the anterior descending artery with 50 % lesion in the distal third and 40 % lesion in the proximal third of the first diagonal branch . The circumflex artery had a 50 % lesion in the middle third and diffuse disease of its obtuse marginal branches . The right coronary artery had an irregular lesion suggestive of a partially resolved thrombus producing a maximum stenosis of 50 % ; distally the posterior descending artery had two 40 % lesions . \n",
            "\n",
            " The patient evolved satisfactorily without further complications and pain - free . He was discharged for outpatient follow - up . \n",
            "\n",
            "\n",
            " \n",
            "O    O  O O  O O    O O   O    O       O O   O   O        O  O   O         O          O  O   O        O     O     O     O O    O        O  O             O   O    O  O         O          O  O     O         O O       O       O O        O   O           O O     O     O     O      O        O   O  O  O          O O      O         O  O       O      O         O  O       O   O       O      O           O     O   O           O O   O       O  O    O        O       O   B        I            O O                 O          O O   O  O      O        O        O  B      O  O       O           O O   O        O           O   O     O     O    O      O O       O O     O   O       O          O  O         B                 I      I         I  I           I            O    O        O  O O      O              O  O   O      O O O   O       O                 O      O  O O       O         O  O   O        O O  O O   O   O   O O   O        O O  O O  O O     O    O          O       O  O   O O    O  O                    O         O  O   O     O         O O  O       O       O  O         O        O O        O      O O  O O  O   O   O O        O             O O  O        O O  O O  O O  O O O            O O           O     O   O     O    O      O    O      O O O       O          O   O    O       O   O  O O          O  O  O     O O          O  O  O     O  O     O O          O  O  O     O  O     O O      O  O O  O O   O   O             O         O     O            O    O  O       O O  O       O         O  O           O    O            O O   O       O   O           O  O   O         O    O    O O     O        O  B        I I I I     O    O           O          O    O          O      O   O     O     O  O         O O O   O   O     O  O  O     O  O         O        O  O  O   O        O    O   O O    O           O  O   O O    O  O   O        O    O O      O          O      O         O  O   O  O        O  O O     O O    O   O   O O   O  O     O O    O   O   O O O O   O    O   O        O           O      O       O       O  O   O        O          O      O    O  O O      O  O   O      O     O   O  O O      O  O   O        O     O  O   O     O        O      O O   B          I      I   I I  I I      I  I   I      I     I   I       I       I  I   I      I        I        O O   O     O        O      O   O  O         O      O          O  O O         O        O        O         O O       O        O  O  O O O        O   O         O          O      O   O   O  O O       O O  O   O       O       O              O       O       O             O   O    O O    O O  O   O          O   O          O      O O  O O   \n"
          ]
        }
      ],
      "source": [
        "words = raw_datasets[\"train\"][0][\"tokens\"]\n",
        "labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
        "#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
        "line1 = \"\"\n",
        "line2 = \"\"\n",
        "for word, label in zip(words, labels):\n",
        "    full_label = label_names[label]\n",
        "    max_length = max(len(word), len(full_label))\n",
        "    line1 += word + \" \" * (max_length - len(word) + 1)\n",
        "    line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
        "\n",
        "print(line1)\n",
        "print(line2)"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Helper Functions"
      ],
      "metadata": {
        "id": "0ABWC5YnTZzp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def align_labels_with_tokens(labels, word_ids):\n",
        "    new_labels = []\n",
        "    current_word = None\n",
        "    for word_id in word_ids:\n",
        "        if word_id != current_word:\n",
        "            # Start of a new word!\n",
        "            current_word = word_id\n",
        "            label = -100 if word_id is None else labels[word_id]\n",
        "            new_labels.append(label)\n",
        "        elif word_id is None:\n",
        "            # Special token\n",
        "            new_labels.append(-100)\n",
        "        else:\n",
        "            # Same word as previous token\n",
        "            label = labels[word_id]\n",
        "            # If the label is B-XXX we change it to I-XXX\n",
        "            if label % 2 == 1:\n",
        "                label += 1\n",
        "            new_labels.append(label)\n",
        "\n",
        "    return new_labels"
      ],
      "metadata": {
        "id": "LQUKQDvxTNHn"
      },
      "execution_count": 37,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def tokenize_and_align_labels(examples):\n",
        "    tokenized_inputs = tokenizer(\n",
        "        examples[\"tokens\"], truncation=True, is_split_into_words=True\n",
        "    )\n",
        "    all_labels = examples[\"ner_tags\"]\n",
        "    new_labels = []\n",
        "    for i, labels in enumerate(all_labels):\n",
        "        word_ids = tokenized_inputs.word_ids(i)\n",
        "        new_labels.append(align_labels_with_tokens(labels, word_ids))\n",
        "\n",
        "    tokenized_inputs[\"labels\"] = new_labels\n",
        "    return tokenized_inputs"
      ],
      "metadata": {
        "id": "qT-6oXUoTYah"
      },
      "execution_count": 38,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "metric = load_metric(\"seqeval\")\n",
        "def compute_metrics(eval_preds):\n",
        "    logits, labels = eval_preds\n",
        "    predictions = np.argmax(logits, axis=-1)\n",
        "\n",
        "    # Remove ignored index (special tokens) and convert to labels\n",
        "    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]\n",
        "    try:\n",
        "      true_predictions = [\n",
        "        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]\n",
        "        for prediction, label in zip(predictions, labels)\n",
        "    ]\n",
        "    except:\n",
        "      true_predictions = []\n",
        "      for prediction, label in zip(predictions, labels):\n",
        "        label_list = []\n",
        "        for (p, l) in zip(prediction, label):\n",
        "          if l != -100:\n",
        "            if p not in range(len(label_names)):\n",
        "              p = 0\n",
        "          \n",
        "            label_list.append(label_names[p])\n",
        "        true_predictions.append(label_list)\n",
        "\n",
        "    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)\n",
        "    return {\n",
        "        \"precision\": all_metrics[\"overall_precision\"],\n",
        "        \"recall\": all_metrics[\"overall_recall\"],\n",
        "        \"f1\": all_metrics[\"overall_f1\"],\n",
        "        \"accuracy\": all_metrics[\"overall_accuracy\"],\n",
        "    }"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 104,
          "referenced_widgets": [
            "b3fe12ca95e84b198d16bdb4d20f9ad9",
            "1b7f8f1786394c01bad4a8589ad16513",
            "70e437b3ba294189b4799c6607532ebd",
            "0fb47d91dbf9497cac1ffc1c5dfd4519",
            "9cfec0f21c0a459f9f5888c389a6a479",
            "ef66098fb5f748eabe11abc3fe4ad54d",
            "563d8b35192240be960bc08909984119",
            "b94385d1423e47f5a9e2351bf873c3e0",
            "e1b6e7774bc94a87ad23fb53d6c9b985",
            "a0b523772cf04a85b0ac000cc9a83c67",
            "71a3f1b2112344ea81721e59cce14cec"
          ]
        },
        "id": "EZMRI9ATTzc9",
        "outputId": "14e38b3a-86a7-49b1-817f-92a7304747a2"
      },
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n",
            "  \"\"\"Entry point for launching an IPython kernel.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b3fe12ca95e84b198d16bdb4d20f9ad9"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Loading BERT as a pre-trained model"
      ],
      "metadata": {
        "id": "6D0P7PztVnCP"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Model 1 - d4data/biomedical-ner-all\n",
        "\n",
        "Complete document wise tokenization"
      ],
      "metadata": {
        "id": "mmPReh2mSux9"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model_checkpoint = \"d4data/biomedical-ner-all\"\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 145,
          "referenced_widgets": [
            "a275c54cbefb4438a3015080e8b57999",
            "a4696c167a3247bd8fd0727e0556463a",
            "afa79c37c031491da9e229c637d80cc4",
            "5ddf799b2fd94edc9949d36450a2d5e9",
            "592af11564074af19e40bce6680ed7f1",
            "125b61b8e80d4192a6f19d43ba4797dc",
            "d9ff16ada2d94eb7a1adc70e5265ff2d",
            "f9bd10de9e2845f08100a29293b92d1c",
            "c28349dbeccc4124b583e0eeec004e6b",
            "ff2cf349b07442bd9812dd8c7e82e59b",
            "414d466fed0b42378d8b38f10c720eba",
            "e432f1e3e5c54358a321a21e9c7aad1f",
            "cf931d70dc1a4d2ba5f10dba7bf90ece",
            "58f1edc459ef4f5bab25544474897db3",
            "8894005504364c36964d283cf58bb223",
            "78db41a453ce4ff4884960c615147331",
            "fe8d877f0fc1417baad9838094045475",
            "74bfdb85ed55436f8c12bf9b25375533",
            "e813e2a1cb7248b7a8c404d55e4fb248",
            "cb3c438fb3a6412d80b5ba673a6455cb",
            "bb189f5bc189462cad4824a1c30335c0",
            "e98b7218049f4310951a1608c52c14e0",
            "3dd1f27ff0d24a1294534ff7e69a7abb",
            "4128d82e19f14e9d9be5416ebc974d0d",
            "6676a80dc293456ea7aed4ce3e281d83",
            "55a7a4c336884f26a53292d559a06ff8",
            "d27469698b1e4ad1ae74ced6f7c3942d",
            "d201490a05c049d38b087008aac0a400",
            "703d715a4ef64c4e93cc6496f5340451",
            "41d861058e3e458e949f1f3d92623217",
            "f4d9343bd31d47b1b3dcf0494825be2d",
            "b75e2a47db2b47dd8740f77b337c308f",
            "d8849516ccb44011a7f9e7e745b30c60",
            "ae5928c8da4243fba06ae9bf5086ba31",
            "e613455bcbb24e36a31666acd83d7b24",
            "bac45a33f9b444a1985ef56a9be85c52",
            "91ac0673e600400f904b1b10deb86cee",
            "289f23dd30814993afde0f5e987fdd9e",
            "8e72912c0e434060ac30517a98d07a9e",
            "ede252ab2cee4ffbbc2f5519373d1e97",
            "f2da21cc1007475ca0233a9e5d146d65",
            "92fd7c43f87142d1bbd05f89ba3bfe39",
            "b58c9bfa30b3421496adb52e082cb50a",
            "6cbfa925d26e47139365d10b9b28d96a"
          ]
        },
        "id": "RfwVBXFAS1Dc",
        "outputId": "3a7c9806-3828-4ab7-c73d-d0e8039e1417"
      },
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/373 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "a275c54cbefb4438a3015080e8b57999"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "e432f1e3e5c54358a321a21e9c7aad1f"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/711k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "3dd1f27ff0d24a1294534ff7e69a7abb"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/125 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "ae5928c8da4243fba06ae9bf5086ba31"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "tokenizer.is_fast"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "a9DegC0yS_Gp",
        "outputId": "083e5505-007e-43c8-fe31-3b202c84cc76"
      },
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 41
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
        "inputs.tokens()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "v-7VQWw5TB-c",
        "outputId": "0922ac3e-61af-43ae-f3a8-29f291e7c19b"
      },
      "execution_count": 42,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Token indices sequence length is longer than the specified maximum sequence length for this model (567 > 512). Running this sequence through the model will result in indexing errors\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['[CLS]',\n",
              " 'this',\n",
              " 'is',\n",
              " 'a',\n",
              " '70',\n",
              " '-',\n",
              " 'year',\n",
              " '-',\n",
              " 'old',\n",
              " 'male',\n",
              " 'patient',\n",
              " ',',\n",
              " 'who',\n",
              " 'was',\n",
              " 'admitted',\n",
              " 'to',\n",
              " 'the',\n",
              " 'emergency',\n",
              " 'department',\n",
              " 'of',\n",
              " 'the',\n",
              " 'hospital',\n",
              " 'pablo',\n",
              " 'to',\n",
              " '##bon',\n",
              " 'ur',\n",
              " '##ibe',\n",
              " ',',\n",
              " 'with',\n",
              " 'symptoms',\n",
              " 'of',\n",
              " 'approximately',\n",
              " 'one',\n",
              " 'hour',\n",
              " 'of',\n",
              " 'evolution',\n",
              " 'consisting',\n",
              " 'of',\n",
              " 'chest',\n",
              " 'tight',\n",
              " '##ness',\n",
              " ',',\n",
              " 'general',\n",
              " 'mala',\n",
              " '##ise',\n",
              " ',',\n",
              " 'as',\n",
              " '##the',\n",
              " '##nia',\n",
              " 'and',\n",
              " 'dia',\n",
              " '##ph',\n",
              " '##ores',\n",
              " '##is',\n",
              " ';',\n",
              " 'which',\n",
              " 'began',\n",
              " 'after',\n",
              " 'having',\n",
              " 'ing',\n",
              " '##ested',\n",
              " '100',\n",
              " 'mg',\n",
              " 'of',\n",
              " 'si',\n",
              " '##lden',\n",
              " '##af',\n",
              " '##il',\n",
              " ',',\n",
              " 'denies',\n",
              " 'ing',\n",
              " '##est',\n",
              " '##ion',\n",
              " 'of',\n",
              " 'another',\n",
              " 'sexual',\n",
              " 'st',\n",
              " '##im',\n",
              " '##ula',\n",
              " '##nt',\n",
              " 'or',\n",
              " 'cocaine',\n",
              " 'and',\n",
              " 'without',\n",
              " 'sexual',\n",
              " 'intercourse',\n",
              " 'after',\n",
              " 'its',\n",
              " 'consumption',\n",
              " '.',\n",
              " 'the',\n",
              " 'patient',\n",
              " \"'\",\n",
              " 's',\n",
              " 'only',\n",
              " 'clinical',\n",
              " 'history',\n",
              " 'was',\n",
              " 'arterial',\n",
              " 'hyper',\n",
              " '##tension',\n",
              " ',',\n",
              " 'ph',\n",
              " '##arm',\n",
              " '##aco',\n",
              " '##logical',\n",
              " '##ly',\n",
              " 'controlled',\n",
              " ',',\n",
              " 'and',\n",
              " 'he',\n",
              " 'denies',\n",
              " 'previous',\n",
              " 'episodes',\n",
              " 'of',\n",
              " 'ang',\n",
              " '##ina',\n",
              " 'or',\n",
              " 'nitrate',\n",
              " 'consumption',\n",
              " '.',\n",
              " 'the',\n",
              " 'clinical',\n",
              " 'examination',\n",
              " 'and',\n",
              " 'vital',\n",
              " 'signs',\n",
              " 'were',\n",
              " 'normal',\n",
              " ';',\n",
              " 'however',\n",
              " ',',\n",
              " 'after',\n",
              " 'the',\n",
              " 'initial',\n",
              " 'assessment',\n",
              " 'he',\n",
              " 'presented',\n",
              " 'card',\n",
              " '##ior',\n",
              " '##es',\n",
              " '##pi',\n",
              " '##rator',\n",
              " '##y',\n",
              " 'arrest',\n",
              " 'secondary',\n",
              " 'to',\n",
              " 'vent',\n",
              " '##ric',\n",
              " '##ular',\n",
              " 'fi',\n",
              " '##bri',\n",
              " '##llation',\n",
              " 'with',\n",
              " 'response',\n",
              " 'to',\n",
              " 'a',\n",
              " 'single',\n",
              " 'def',\n",
              " '##ib',\n",
              " '##rill',\n",
              " '##ation',\n",
              " 'of',\n",
              " '200',\n",
              " 'jo',\n",
              " '##ules',\n",
              " '.',\n",
              " 'the',\n",
              " 'initial',\n",
              " 'electro',\n",
              " '##card',\n",
              " '##io',\n",
              " '##gram',\n",
              " 'showed',\n",
              " 'st',\n",
              " '-',\n",
              " 'segment',\n",
              " 'elevation',\n",
              " 'in',\n",
              " 'the',\n",
              " 'inferior',\n",
              " '(',\n",
              " 'ii',\n",
              " ',',\n",
              " 'iii',\n",
              " 'and',\n",
              " 'av',\n",
              " '##f',\n",
              " ')',\n",
              " 'and',\n",
              " 'anterior',\n",
              " '(',\n",
              " 'v',\n",
              " '##2',\n",
              " '-',\n",
              " 'v',\n",
              " '##4',\n",
              " ')',\n",
              " 'leads',\n",
              " 'with',\n",
              " 'reciprocal',\n",
              " 'changes',\n",
              " 'in',\n",
              " 'av',\n",
              " '##l',\n",
              " ',',\n",
              " 'with',\n",
              " 'no',\n",
              " 'electro',\n",
              " '##card',\n",
              " '##io',\n",
              " '##graphic',\n",
              " 'extension',\n",
              " 'to',\n",
              " 'the',\n",
              " 'right',\n",
              " 'vent',\n",
              " '##ric',\n",
              " '##le',\n",
              " '.',\n",
              " 'cardiac',\n",
              " 'enzymes',\n",
              " 'on',\n",
              " 'admission',\n",
              " 'revealed',\n",
              " 'a',\n",
              " 'cr',\n",
              " '##ea',\n",
              " '##tine',\n",
              " 'kinase',\n",
              " '(',\n",
              " 'ck',\n",
              " ')',\n",
              " 'of',\n",
              " '170',\n",
              " 'and',\n",
              " 'a',\n",
              " 'cr',\n",
              " '##ea',\n",
              " '##tine',\n",
              " 'ph',\n",
              " '##os',\n",
              " '##ph',\n",
              " '##oki',\n",
              " '##nas',\n",
              " '##e',\n",
              " '-',\n",
              " 'mb',\n",
              " 'fraction',\n",
              " '(',\n",
              " 'ck',\n",
              " '-',\n",
              " 'mb',\n",
              " ')',\n",
              " 'of',\n",
              " '6',\n",
              " '.',\n",
              " 'electro',\n",
              " '##ly',\n",
              " '##tes',\n",
              " ',',\n",
              " 'coa',\n",
              " '##gul',\n",
              " '##ation',\n",
              " 'tests',\n",
              " 'and',\n",
              " 'blood',\n",
              " 'cell',\n",
              " 'counts',\n",
              " 'were',\n",
              " 'normal',\n",
              " '.',\n",
              " 'initial',\n",
              " 'management',\n",
              " 'was',\n",
              " 'with',\n",
              " 'as',\n",
              " '##pi',\n",
              " '##rin',\n",
              " '100',\n",
              " 'mg',\n",
              " ',',\n",
              " 'lo',\n",
              " '##vas',\n",
              " '##tat',\n",
              " '##in',\n",
              " '40',\n",
              " 'mg',\n",
              " 'daily',\n",
              " ',',\n",
              " 'met',\n",
              " '##op',\n",
              " '##rol',\n",
              " '##ol',\n",
              " '25',\n",
              " 'mg',\n",
              " 'every',\n",
              " '12',\n",
              " 'hours',\n",
              " ',',\n",
              " 'en',\n",
              " '##ox',\n",
              " '##apa',\n",
              " '##rin',\n",
              " '60',\n",
              " 'mg',\n",
              " 'every',\n",
              " '12',\n",
              " 'hours',\n",
              " ',',\n",
              " 'oxygen',\n",
              " 'at',\n",
              " '3',\n",
              " 'lt',\n",
              " '/',\n",
              " 'min',\n",
              " 'and',\n",
              " 'st',\n",
              " '##re',\n",
              " '##pt',\n",
              " '##oki',\n",
              " '##nas',\n",
              " '##e',\n",
              " '1',\n",
              " \"'\",\n",
              " '500',\n",
              " ',',\n",
              " '000',\n",
              " 'units',\n",
              " 'administered',\n",
              " 'over',\n",
              " '30',\n",
              " 'minutes',\n",
              " '.',\n",
              " 'no',\n",
              " 'changes',\n",
              " 'secondary',\n",
              " 'to',\n",
              " 'rep',\n",
              " '##er',\n",
              " '##fusion',\n",
              " 'were',\n",
              " 'demonstrated',\n",
              " '.',\n",
              " 'the',\n",
              " 'patient',\n",
              " 'was',\n",
              " 'transferred',\n",
              " 'to',\n",
              " 'the',\n",
              " 'intensive',\n",
              " 'care',\n",
              " 'unit',\n",
              " ',',\n",
              " 'where',\n",
              " 'episodes',\n",
              " 'of',\n",
              " 'complete',\n",
              " 'a',\n",
              " '-',\n",
              " 'v',\n",
              " 'block',\n",
              " 'with',\n",
              " 'spontaneous',\n",
              " 'resolution',\n",
              " 'were',\n",
              " 'documented',\n",
              " 'during',\n",
              " 'the',\n",
              " 'first',\n",
              " 'hours',\n",
              " 'of',\n",
              " 'evolution',\n",
              " '.',\n",
              " 'the',\n",
              " 'ek',\n",
              " '##g',\n",
              " 'taken',\n",
              " 'at',\n",
              " '24',\n",
              " 'hours',\n",
              " 'of',\n",
              " 'evolution',\n",
              " 'revealed',\n",
              " 'q',\n",
              " '##s',\n",
              " 'in',\n",
              " 'the',\n",
              " 'inferior',\n",
              " 'face',\n",
              " 'and',\n",
              " 'a',\n",
              " 'late',\n",
              " 'progression',\n",
              " 'of',\n",
              " 'the',\n",
              " 'r',\n",
              " 'wave',\n",
              " 'in',\n",
              " 'the',\n",
              " 'anterior',\n",
              " 'face',\n",
              " '.',\n",
              " 'enzyme',\n",
              " 'monitoring',\n",
              " 'showed',\n",
              " 'increased',\n",
              " 'ck',\n",
              " 'and',\n",
              " 'mb',\n",
              " 'fraction',\n",
              " 'at',\n",
              " '6',\n",
              " 'hours',\n",
              " '(',\n",
              " '44',\n",
              " '##7',\n",
              " '##6',\n",
              " 'and',\n",
              " '165',\n",
              " ')',\n",
              " 'and',\n",
              " '12',\n",
              " 'hours',\n",
              " '(',\n",
              " '38',\n",
              " '##39',\n",
              " 'and',\n",
              " '136',\n",
              " ')',\n",
              " '.',\n",
              " 'the',\n",
              " 'next',\n",
              " 'day',\n",
              " 'corona',\n",
              " '##ry',\n",
              " 'ang',\n",
              " '##iography',\n",
              " 'showed',\n",
              " 'diffuse',\n",
              " 'disease',\n",
              " 'of',\n",
              " 'the',\n",
              " 'anterior',\n",
              " 'descending',\n",
              " 'artery',\n",
              " 'with',\n",
              " '50',\n",
              " '%',\n",
              " 'les',\n",
              " '##ion',\n",
              " 'in',\n",
              " 'the',\n",
              " 'distal',\n",
              " 'third',\n",
              " 'and',\n",
              " '40',\n",
              " '%',\n",
              " 'les',\n",
              " '##ion',\n",
              " 'in',\n",
              " 'the',\n",
              " 'pro',\n",
              " '##xi',\n",
              " '##mal',\n",
              " 'third',\n",
              " 'of',\n",
              " 'the',\n",
              " 'first',\n",
              " 'diagonal',\n",
              " 'branch',\n",
              " '.',\n",
              " 'the',\n",
              " 'ci',\n",
              " '##rc',\n",
              " '##um',\n",
              " '##fle',\n",
              " '##x',\n",
              " 'artery',\n",
              " 'had',\n",
              " 'a',\n",
              " '50',\n",
              " '%',\n",
              " 'les',\n",
              " '##ion',\n",
              " 'in',\n",
              " 'the',\n",
              " 'middle',\n",
              " 'third',\n",
              " 'and',\n",
              " 'diffuse',\n",
              " 'disease',\n",
              " 'of',\n",
              " 'its',\n",
              " 'ob',\n",
              " '##tus',\n",
              " '##e',\n",
              " 'marginal',\n",
              " 'branches',\n",
              " '.',\n",
              " 'the',\n",
              " 'right',\n",
              " 'corona',\n",
              " '##ry',\n",
              " 'artery',\n",
              " 'had',\n",
              " 'an',\n",
              " 'irregular',\n",
              " 'les',\n",
              " '##ion',\n",
              " 'suggest',\n",
              " '##ive',\n",
              " 'of',\n",
              " 'a',\n",
              " 'partially',\n",
              " 'resolved',\n",
              " 'th',\n",
              " '##rom',\n",
              " '##bus',\n",
              " 'producing',\n",
              " 'a',\n",
              " 'maximum',\n",
              " 'ste',\n",
              " '##nosis',\n",
              " 'of',\n",
              " '50',\n",
              " '%',\n",
              " ';',\n",
              " 'distal',\n",
              " '##ly',\n",
              " 'the',\n",
              " 'posterior',\n",
              " 'descending',\n",
              " 'artery',\n",
              " 'had',\n",
              " 'two',\n",
              " '40',\n",
              " '%',\n",
              " 'lesions',\n",
              " '.',\n",
              " 'the',\n",
              " 'patient',\n",
              " 'evolved',\n",
              " 'sat',\n",
              " '##is',\n",
              " '##fa',\n",
              " '##ctor',\n",
              " '##ily',\n",
              " 'without',\n",
              " 'further',\n",
              " 'complications',\n",
              " 'and',\n",
              " 'pain',\n",
              " '-',\n",
              " 'free',\n",
              " '.',\n",
              " 'he',\n",
              " 'was',\n",
              " 'discharged',\n",
              " 'for',\n",
              " 'out',\n",
              " '##patient',\n",
              " 'follow',\n",
              " '-',\n",
              " 'up',\n",
              " '.',\n",
              " '[SEP]']"
            ]
          },
          "metadata": {},
          "execution_count": 42
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "word_ids = inputs.word_ids()\n",
        "print(labels)\n",
        "print(align_labels_with_tokens(labels, word_ids))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "w77KW9-3TD4u",
        "outputId": "eae68b43-ff6a-48cb-a380-930f156c24c6"
      },
      "execution_count": 43,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "tokenized_datasets = raw_datasets.map(\n",
        "    tokenize_and_align_labels,\n",
        "    batched=True,\n",
        "    remove_columns=raw_datasets[\"train\"].column_names,\n",
        ")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 81,
          "referenced_widgets": [
            "d161016f9fea41e6b27eb537c12d0703",
            "f315bcfdc76848cb8e851a2698e0248b",
            "ef152955607540f2a7d38bf9e2207eec",
            "456c36425ac94dc294f8402c07668a51",
            "4d305d32efdf4b639e65e816a7132597",
            "5c3a10b039c344509be9867ca40a8472",
            "6663eac35b7a4043b97edb90a555e3d9",
            "30bce58edba74043abc1a2625c492d4a",
            "d4b6dbbad9c946ed99b6c6e587bfb6da",
            "8976a59e4ea049088f92a37f7547e16e",
            "6a689955d9b3463abaaaa03b62d3cf69",
            "9d65a59161cd401aad05f4a52d51c724",
            "6f244b91a3884eb5b0fbd577ed5d1710",
            "2b6ad660dd1f4c78855433118b9fb61e",
            "101fa9a9581a46d8b1e0951f03796740",
            "7ffe4378bc7b410780780dd51d0705ea",
            "77c422e831944566a6529da37645ef6d",
            "9a88121d0138438980f1c7e4341f480a",
            "52600cdbf4804b148e02724ae4902de5",
            "11f16a0c34e64d6494ac1d2550d18f8f",
            "645616ac236e479c8303a56100d26d51",
            "859e35e323f0407fbdea9eb7ae953742"
          ]
        },
        "id": "5BPyKS51TWGK",
        "outputId": "f7ada6c5-8860-40d3-f32b-4b79abe14ae8"
      },
      "execution_count": 44,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/1 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "d161016f9fea41e6b27eb537c12d0703"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/1 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9d65a59161cd401aad05f4a52d51c724"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
      ],
      "metadata": {
        "id": "_EZvP3kyTfms"
      },
      "execution_count": 45,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
        "batch[\"labels\"]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IkQdm1ODTiWJ",
        "outputId": "5d18c54e-c5aa-4d89-ed8a-e1b7664350f8"
      },
      "execution_count": 46,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[-100,    0,    0,  ...,    0,    0, -100],\n",
              "        [-100,    0,    0,  ...,    0,    0, -100]])"
            ]
          },
          "metadata": {},
          "execution_count": 46
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "id2label = {str(i): label for i, label in enumerate(label_names)}\n",
        "label2id = {v: k for k, v in id2label.items()}"
      ],
      "metadata": {
        "id": "QPom2dyaTkdf"
      },
      "execution_count": 47,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model = AutoModelForTokenClassification.from_pretrained(    \n",
        "    model_checkpoint\n",
        ")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LSRqpzXDTnpt",
        "outputId": "f87c31de-e40d-49f8-81a1-53a2a9905083"
      },
      "execution_count": 55,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/config.json\n",
            "Model config DistilBertConfig {\n",
            "  \"_name_or_path\": \"d4data/biomedical-ner-all\",\n",
            "  \"activation\": \"gelu\",\n",
            "  \"architectures\": [\n",
            "    \"DistilBertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.1,\n",
            "  \"dim\": 768,\n",
            "  \"dropout\": 0.1,\n",
            "  \"hidden_dim\": 3072,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-Activity\",\n",
            "    \"2\": \"B-Administration\",\n",
            "    \"3\": \"B-Age\",\n",
            "    \"4\": \"B-Area\",\n",
            "    \"5\": \"B-Biological_attribute\",\n",
            "    \"6\": \"B-Biological_structure\",\n",
            "    \"7\": \"B-Clinical_event\",\n",
            "    \"8\": \"B-Color\",\n",
            "    \"9\": \"B-Coreference\",\n",
            "    \"10\": \"B-Date\",\n",
            "    \"11\": \"B-Detailed_description\",\n",
            "    \"12\": \"B-Diagnostic_procedure\",\n",
            "    \"13\": \"B-Disease_disorder\",\n",
            "    \"14\": \"B-Distance\",\n",
            "    \"15\": \"B-Dosage\",\n",
            "    \"16\": \"B-Duration\",\n",
            "    \"17\": \"B-Family_history\",\n",
            "    \"18\": \"B-Frequency\",\n",
            "    \"19\": \"B-Height\",\n",
            "    \"20\": \"B-History\",\n",
            "    \"21\": \"B-Lab_value\",\n",
            "    \"22\": \"B-Mass\",\n",
            "    \"23\": \"B-Medication\",\n",
            "    \"24\": \"B-Non[biological](Detailed_description\",\n",
            "    \"25\": \"B-Nonbiological_location\",\n",
            "    \"26\": \"B-Occupation\",\n",
            "    \"27\": \"B-Other_entity\",\n",
            "    \"28\": \"B-Other_event\",\n",
            "    \"29\": \"B-Outcome\",\n",
            "    \"30\": \"B-Personal_[back](Biological_structure\",\n",
            "    \"31\": \"B-Personal_background\",\n",
            "    \"32\": \"B-Qualitative_concept\",\n",
            "    \"33\": \"B-Quantitative_concept\",\n",
            "    \"34\": \"B-Severity\",\n",
            "    \"35\": \"B-Sex\",\n",
            "    \"36\": \"B-Shape\",\n",
            "    \"37\": \"B-Sign_symptom\",\n",
            "    \"38\": \"B-Subject\",\n",
            "    \"39\": \"B-Texture\",\n",
            "    \"40\": \"B-Therapeutic_procedure\",\n",
            "    \"41\": \"B-Time\",\n",
            "    \"42\": \"B-Volume\",\n",
            "    \"43\": \"B-Weight\",\n",
            "    \"44\": \"I-Activity\",\n",
            "    \"45\": \"I-Administration\",\n",
            "    \"46\": \"I-Age\",\n",
            "    \"47\": \"I-Area\",\n",
            "    \"48\": \"I-Biological_attribute\",\n",
            "    \"49\": \"I-Biological_structure\",\n",
            "    \"50\": \"I-Clinical_event\",\n",
            "    \"51\": \"I-Color\",\n",
            "    \"52\": \"I-Coreference\",\n",
            "    \"53\": \"I-Date\",\n",
            "    \"54\": \"I-Detailed_description\",\n",
            "    \"55\": \"I-Diagnostic_procedure\",\n",
            "    \"56\": \"I-Disease_disorder\",\n",
            "    \"57\": \"I-Distance\",\n",
            "    \"58\": \"I-Dosage\",\n",
            "    \"59\": \"I-Duration\",\n",
            "    \"60\": \"I-Family_history\",\n",
            "    \"61\": \"I-Frequency\",\n",
            "    \"62\": \"I-Height\",\n",
            "    \"63\": \"I-History\",\n",
            "    \"64\": \"I-Lab_value\",\n",
            "    \"65\": \"I-Mass\",\n",
            "    \"66\": \"I-Medication\",\n",
            "    \"67\": \"I-Nonbiological_location\",\n",
            "    \"68\": \"I-Occupation\",\n",
            "    \"69\": \"I-Other_entity\",\n",
            "    \"70\": \"I-Other_event\",\n",
            "    \"71\": \"I-Outcome\",\n",
            "    \"72\": \"I-Personal_background\",\n",
            "    \"73\": \"I-Qualitative_concept\",\n",
            "    \"74\": \"I-Quantitative_concept\",\n",
            "    \"75\": \"I-Severity\",\n",
            "    \"76\": \"I-Shape\",\n",
            "    \"77\": \"I-Sign_symptom\",\n",
            "    \"78\": \"I-Subject\",\n",
            "    \"79\": \"I-Texture\",\n",
            "    \"80\": \"I-Therapeutic_procedure\",\n",
            "    \"81\": \"I-Time\",\n",
            "    \"82\": \"I-Volume\",\n",
            "    \"83\": \"I-Weight\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"label2id\": {\n",
            "    \"B-Activity\": 1,\n",
            "    \"B-Administration\": 2,\n",
            "    \"B-Age\": 3,\n",
            "    \"B-Area\": 4,\n",
            "    \"B-Biological_attribute\": 5,\n",
            "    \"B-Biological_structure\": 6,\n",
            "    \"B-Clinical_event\": 7,\n",
            "    \"B-Color\": 8,\n",
            "    \"B-Coreference\": 9,\n",
            "    \"B-Date\": 10,\n",
            "    \"B-Detailed_description\": 11,\n",
            "    \"B-Diagnostic_procedure\": 12,\n",
            "    \"B-Disease_disorder\": 13,\n",
            "    \"B-Distance\": 14,\n",
            "    \"B-Dosage\": 15,\n",
            "    \"B-Duration\": 16,\n",
            "    \"B-Family_history\": 17,\n",
            "    \"B-Frequency\": 18,\n",
            "    \"B-Height\": 19,\n",
            "    \"B-History\": 20,\n",
            "    \"B-Lab_value\": 21,\n",
            "    \"B-Mass\": 22,\n",
            "    \"B-Medication\": 23,\n",
            "    \"B-Non[biological](Detailed_description\": 24,\n",
            "    \"B-Nonbiological_location\": 25,\n",
            "    \"B-Occupation\": 26,\n",
            "    \"B-Other_entity\": 27,\n",
            "    \"B-Other_event\": 28,\n",
            "    \"B-Outcome\": 29,\n",
            "    \"B-Personal_[back](Biological_structure\": 30,\n",
            "    \"B-Personal_background\": 31,\n",
            "    \"B-Qualitative_concept\": 32,\n",
            "    \"B-Quantitative_concept\": 33,\n",
            "    \"B-Severity\": 34,\n",
            "    \"B-Sex\": 35,\n",
            "    \"B-Shape\": 36,\n",
            "    \"B-Sign_symptom\": 37,\n",
            "    \"B-Subject\": 38,\n",
            "    \"B-Texture\": 39,\n",
            "    \"B-Therapeutic_procedure\": 40,\n",
            "    \"B-Time\": 41,\n",
            "    \"B-Volume\": 42,\n",
            "    \"B-Weight\": 43,\n",
            "    \"I-Activity\": 44,\n",
            "    \"I-Administration\": 45,\n",
            "    \"I-Age\": 46,\n",
            "    \"I-Area\": 47,\n",
            "    \"I-Biological_attribute\": 48,\n",
            "    \"I-Biological_structure\": 49,\n",
            "    \"I-Clinical_event\": 50,\n",
            "    \"I-Color\": 51,\n",
            "    \"I-Coreference\": 52,\n",
            "    \"I-Date\": 53,\n",
            "    \"I-Detailed_description\": 54,\n",
            "    \"I-Diagnostic_procedure\": 55,\n",
            "    \"I-Disease_disorder\": 56,\n",
            "    \"I-Distance\": 57,\n",
            "    \"I-Dosage\": 58,\n",
            "    \"I-Duration\": 59,\n",
            "    \"I-Family_history\": 60,\n",
            "    \"I-Frequency\": 61,\n",
            "    \"I-Height\": 62,\n",
            "    \"I-History\": 63,\n",
            "    \"I-Lab_value\": 64,\n",
            "    \"I-Mass\": 65,\n",
            "    \"I-Medication\": 66,\n",
            "    \"I-Nonbiological_location\": 67,\n",
            "    \"I-Occupation\": 68,\n",
            "    \"I-Other_entity\": 69,\n",
            "    \"I-Other_event\": 70,\n",
            "    \"I-Outcome\": 71,\n",
            "    \"I-Personal_background\": 72,\n",
            "    \"I-Qualitative_concept\": 73,\n",
            "    \"I-Quantitative_concept\": 74,\n",
            "    \"I-Severity\": 75,\n",
            "    \"I-Shape\": 76,\n",
            "    \"I-Sign_symptom\": 77,\n",
            "    \"I-Subject\": 78,\n",
            "    \"I-Texture\": 79,\n",
            "    \"I-Therapeutic_procedure\": 80,\n",
            "    \"I-Time\": 81,\n",
            "    \"I-Volume\": 82,\n",
            "    \"I-Weight\": 83,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"distilbert\",\n",
            "  \"n_heads\": 12,\n",
            "  \"n_layers\": 6,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"qa_dropout\": 0.1,\n",
            "  \"seq_classif_dropout\": 0.2,\n",
            "  \"sinusoidal_pos_embds\": false,\n",
            "  \"tie_weights_\": true,\n",
            "  \"torch_dtype\": \"float32\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"vocab_size\": 30522\n",
            "}\n",
            "\n",
            "loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/pytorch_model.bin\n",
            "All model checkpoint weights were used when initializing DistilBertForTokenClassification.\n",
            "\n",
            "All the weights of DistilBertForTokenClassification were initialized from the model checkpoint at d4data/biomedical-ner-all.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use DistilBertForTokenClassification for predictions without further training.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "args = TrainingArguments(\n",
        "    \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
        "    evaluation_strategy = IntervalStrategy.STEPS,\n",
        "    eval_steps = 50,\n",
        "    learning_rate=5e-5,\n",
        "    num_train_epochs=50,\n",
        "    weight_decay=0.01,\n",
        "    metric_for_best_model = 'f1',\n",
        "   load_best_model_at_end=True\n",
        ")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Z4DTxHPoTsGs",
        "outputId": "9b8c9d65-4d5a-4fb6-b91e-d41efb1ac68c"
      },
      "execution_count": 56,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "PyTorch: setting up devices\n",
            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=args,\n",
        "    train_dataset=tokenized_datasets[\"train\"],\n",
        "    eval_dataset=tokenized_datasets[\"validation\"],\n",
        "    data_collator=data_collator,\n",
        "    compute_metrics=compute_metrics,\n",
        "    tokenizer=tokenizer,\n",
        "    callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
        ")\n",
        "trainer.train()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "rVfzrBZMTv6x",
        "outputId": "3cdd94b4-ab62-40dc-e180-1f7a22a4ef1b"
      },
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running training *****\n",
            "  Num examples = 555\n",
            "  Num Epochs = 50\n",
            "  Instantaneous batch size per device = 8\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 3500\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='900' max='3500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [ 900/3500 02:39 < 07:41, 5.63 it/s, Epoch 12/50]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Precision</th>\n",
              "      <th>Recall</th>\n",
              "      <th>F1</th>\n",
              "      <th>Accuracy</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>50</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.162818</td>\n",
              "      <td>0.270997</td>\n",
              "      <td>0.309661</td>\n",
              "      <td>0.289042</td>\n",
              "      <td>0.937595</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.166674</td>\n",
              "      <td>0.348107</td>\n",
              "      <td>0.517594</td>\n",
              "      <td>0.416259</td>\n",
              "      <td>0.938953</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>150</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.165462</td>\n",
              "      <td>0.376270</td>\n",
              "      <td>0.521433</td>\n",
              "      <td>0.437115</td>\n",
              "      <td>0.938915</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>200</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.146949</td>\n",
              "      <td>0.404802</td>\n",
              "      <td>0.463852</td>\n",
              "      <td>0.432320</td>\n",
              "      <td>0.945101</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>250</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.165257</td>\n",
              "      <td>0.440503</td>\n",
              "      <td>0.492642</td>\n",
              "      <td>0.465116</td>\n",
              "      <td>0.944807</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>300</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.187354</td>\n",
              "      <td>0.421611</td>\n",
              "      <td>0.431862</td>\n",
              "      <td>0.426675</td>\n",
              "      <td>0.945729</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>350</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.203230</td>\n",
              "      <td>0.445104</td>\n",
              "      <td>0.479846</td>\n",
              "      <td>0.461823</td>\n",
              "      <td>0.945396</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>400</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.215578</td>\n",
              "      <td>0.419140</td>\n",
              "      <td>0.523992</td>\n",
              "      <td>0.465738</td>\n",
              "      <td>0.944269</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>450</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.228786</td>\n",
              "      <td>0.456535</td>\n",
              "      <td>0.467051</td>\n",
              "      <td>0.461733</td>\n",
              "      <td>0.945178</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>500</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.261185</td>\n",
              "      <td>0.427002</td>\n",
              "      <td>0.522073</td>\n",
              "      <td>0.469775</td>\n",
              "      <td>0.943641</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>550</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.265154</td>\n",
              "      <td>0.424972</td>\n",
              "      <td>0.485605</td>\n",
              "      <td>0.453270</td>\n",
              "      <td>0.945165</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>600</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.269958</td>\n",
              "      <td>0.383793</td>\n",
              "      <td>0.551504</td>\n",
              "      <td>0.452612</td>\n",
              "      <td>0.941015</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>650</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.283041</td>\n",
              "      <td>0.449687</td>\n",
              "      <td>0.506078</td>\n",
              "      <td>0.476219</td>\n",
              "      <td>0.945652</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>700</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.296066</td>\n",
              "      <td>0.423529</td>\n",
              "      <td>0.552783</td>\n",
              "      <td>0.479600</td>\n",
              "      <td>0.941335</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>750</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.282410</td>\n",
              "      <td>0.444382</td>\n",
              "      <td>0.503519</td>\n",
              "      <td>0.472106</td>\n",
              "      <td>0.944884</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>800</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.295058</td>\n",
              "      <td>0.400769</td>\n",
              "      <td>0.533589</td>\n",
              "      <td>0.457739</td>\n",
              "      <td>0.943615</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>850</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.294637</td>\n",
              "      <td>0.432018</td>\n",
              "      <td>0.504159</td>\n",
              "      <td>0.465309</td>\n",
              "      <td>0.944320</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>900</td>\n",
              "      <td>0.103800</td>\n",
              "      <td>0.296676</td>\n",
              "      <td>0.411402</td>\n",
              "      <td>0.512476</td>\n",
              "      <td>0.456410</td>\n",
              "      <td>0.942796</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "\n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500 (score: 0.4697754749568221).\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=900, training_loss=0.06402364306979709, metrics={'train_runtime': 159.6722, 'train_samples_per_second': 173.794, 'train_steps_per_second': 21.92, 'total_flos': 932785215873192.0, 'train_loss': 0.06402364306979709, 'epoch': 12.86})"
            ]
          },
          "metadata": {},
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "trainer.save_model('model/distilbert-base-uncased-all-tokens')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "g_r4zDTuT6n0",
        "outputId": "d9cf6e0f-4d72-4dc3-bdaf-fe162acfde8f"
      },
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Saving model checkpoint to model/distilbert-base-uncased-all-tokens\n",
            "Configuration saved in model/distilbert-base-uncased-all-tokens/config.json\n",
            "Model weights saved in model/distilbert-base-uncased-all-tokens/pytorch_model.bin\n",
            "tokenizer config file saved in model/distilbert-base-uncased-all-tokens/tokenizer_config.json\n",
            "Special tokens file saved in model/distilbert-base-uncased-all-tokens/special_tokens_map.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Model 1 - d4data/biomedical-ner-all\n",
        "\n",
        "### Sentence Based Modelling"
      ],
      "metadata": {
        "id": "U9LTC6H7Ut3-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
      ],
      "metadata": {
        "id": "YntZAcxIUpmM"
      },
      "execution_count": 59,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": 60,
      "metadata": {
        "id": "c0p6sqVqVDhK"
      },
      "outputs": [],
      "source": [
        "dataset = Dataset.from_dict(dic)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 61,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "54fb3085-2261-496a-f6d0-60f93f654540",
        "id": "dKokCRtaVDhK"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 11668\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 61
        }
      ],
      "source": [
        "dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 62,
      "metadata": {
        "id": "yaiKWzNRVDhK"
      },
      "outputs": [],
      "source": [
        "#For training, validation, and test partitions\n",
        "\"\"\"\n",
        "#Train, val, test partitions\n",
        "train_test = dataset.train_test_split()\n",
        "test_val = train_test['test'].train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': test_val['train'],\n",
        "    'test': test_val['test']\n",
        "    })\n",
        "\"\"\"\n",
        "\n",
        "#Just for training and validation partitions\n",
        "train_test = dataset.train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': train_test['test']\n",
        "    })"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 63,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "3f04c836-fa6c-4f6b-9be8-1a3b77910f74",
        "id": "bJryyZX2VDhL"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "DatasetDict({\n",
              "    train: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 8751\n",
              "    })\n",
              "    validation: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 2917\n",
              "    })\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 63
        }
      ],
      "source": [
        "raw_datasets"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 64,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7cb91446-41f5-4257-9d36-aac32e1a2d8b",
        "id": "p9q9WmGpVDhL"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
            ]
          },
          "metadata": {},
          "execution_count": 64
        }
      ],
      "source": [
        "raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"chunk_tags\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 65,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "0dd389dd-cb44-4235-c327-f58e5d8f2f1a",
        "id": "K7Sip5njVDhL"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 8751\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 65
        }
      ],
      "source": [
        "raw_datasets['train']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 66,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fc667def-0a5f-481d-85c3-8ff5f80a5eb5",
        "id": "BI420tEFVDhL"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['O', 'B', 'I']"
            ]
          },
          "metadata": {},
          "execution_count": 66
        }
      ],
      "source": [
        "label_names = ['O','B','I']\n",
        "label_names"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 67,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d11245fb-f724-440f-954e-064b90d32579",
        "id": "fvbDPubIVDhL"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Since 2006 she had tried several times to lose weight , without success . \n",
            "O     O    O   O   O     O       O     O  O    O      O O       O       O \n"
          ]
        }
      ],
      "source": [
        "words = raw_datasets[\"train\"][0][\"tokens\"]\n",
        "labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
        "#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
        "line1 = \"\"\n",
        "line2 = \"\"\n",
        "for word, label in zip(words, labels):\n",
        "    full_label = label_names[label]\n",
        "    max_length = max(len(word), len(full_label))\n",
        "    line1 += word + \" \" * (max_length - len(word) + 1)\n",
        "    line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
        "\n",
        "print(line1)\n",
        "print(line2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 68,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "9d061e6c-4cd9-472d-b873-814d709efb63",
        "id": "LXmlD43QVDhL"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/vocab.txt\n",
            "loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/tokenizer.json\n",
            "loading file added_tokens.json from cache at None\n",
            "loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/special_tokens_map.json\n",
            "loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/tokenizer_config.json\n"
          ]
        }
      ],
      "source": [
        "model_checkpoint = \"d4data/biomedical-ner-all\"\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 69,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e11cebbd-2fd8-4db3-f014-d52f80ae104c",
        "id": "rUn3zUd9VDhM"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 69
        }
      ],
      "source": [
        "tokenizer.is_fast"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 70,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "0196fde7-250b-4486-f63f-2dcea8848e5d",
        "id": "LXxYSaDLVDhM"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['[CLS]',\n",
              " 'since',\n",
              " '2006',\n",
              " 'she',\n",
              " 'had',\n",
              " 'tried',\n",
              " 'several',\n",
              " 'times',\n",
              " 'to',\n",
              " 'lose',\n",
              " 'weight',\n",
              " ',',\n",
              " 'without',\n",
              " 'success',\n",
              " '.',\n",
              " '[SEP]']"
            ]
          },
          "metadata": {},
          "execution_count": 70
        }
      ],
      "source": [
        "inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
        "inputs.tokens()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 71,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "48b11a85-7ca4-4589-dcba-21833e5b6c55",
        "id": "-bG1VI2NVDhM"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
          ]
        }
      ],
      "source": [
        "labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "word_ids = inputs.word_ids()\n",
        "print(labels)\n",
        "print(align_labels_with_tokens(labels, word_ids))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 72,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 81,
          "referenced_widgets": [
            "40c2b37fa07f44648cecc9b7e406e7e2",
            "69ed5fe8ed6046acb4202689c065f858",
            "bbd9cf7a77aa48fda3a648583ed02b08",
            "ebe8b2b35e884fd28bb42eacf01ff07c",
            "ddb14bc1d5d4437a9ee4a895846e7d29",
            "669c24c6309f46cbbdcd0c764143e74f",
            "d1e4665beafa4bbeb25d0e9e8447a5a9",
            "09217bdc1e2145eb84cc97207595e6f0",
            "b294af01ac5f483dacbe2e1c40fdf223",
            "c1e27e9184204d618ce59b97f7302335",
            "2e63f2af443d448aaaddf81127def048",
            "9239cc2fd1d94d86986b7f395de70fca",
            "f1e8d31b67db4089ab1b036bda341617",
            "33be40ebcab54ff68855f1145cf5e1d6",
            "d96c111f09d74a0c9816328f88d9e45b",
            "3907dc2aaa484877aee9beab8a6888d4",
            "aaacfb0f3bd1427ea44ec84c28a2aaf7",
            "cb92e843491142e8a2a4008223a90d02",
            "52169f264141463e94a7761a4ffb3f7a",
            "e509b790873740b59aa2f52875ca2038",
            "a395318bce7348d78ca83a308552f042",
            "a2bb171f700743559e1d2c472c8289ef"
          ]
        },
        "outputId": "aad10f99-e5fa-423c-f259-a12310b18d1d",
        "id": "0OVZq1BtVDhM"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/9 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "40c2b37fa07f44648cecc9b7e406e7e2"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/3 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9239cc2fd1d94d86986b7f395de70fca"
            }
          },
          "metadata": {}
        }
      ],
      "source": [
        "tokenized_datasets = raw_datasets.map(\n",
        "    tokenize_and_align_labels,\n",
        "    batched=True,\n",
        "    remove_columns=raw_datasets[\"train\"].column_names,\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 73,
      "metadata": {
        "id": "rdl_EpWuVDhN"
      },
      "outputs": [],
      "source": [
        "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 74,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d2768d3a-b399-4236-a9c4-cfff18f31d1c",
        "id": "gY7K46A2VDhN"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[-100,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
              "            0,    0,    0, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n",
              "         -100, -100, -100, -100, -100, -100, -100, -100, -100, -100],\n",
              "        [-100,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
              "            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
              "            1,    2,    2,    2,    2,    2,    2,    2,    0, -100]])"
            ]
          },
          "metadata": {},
          "execution_count": 74
        }
      ],
      "source": [
        "batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
        "batch[\"labels\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 75,
      "metadata": {
        "id": "PNC5_yuKVDhN"
      },
      "outputs": [],
      "source": [
        "id2label = {str(i): label for i, label in enumerate(label_names)}\n",
        "label2id = {v: k for k, v in id2label.items()}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 76,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ba1aa394-ff7a-471e-a8dc-d35755f501ed",
        "id": "AbRkJHvbVDhN"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/config.json\n",
            "Model config DistilBertConfig {\n",
            "  \"_name_or_path\": \"d4data/biomedical-ner-all\",\n",
            "  \"activation\": \"gelu\",\n",
            "  \"architectures\": [\n",
            "    \"DistilBertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.1,\n",
            "  \"dim\": 768,\n",
            "  \"dropout\": 0.1,\n",
            "  \"hidden_dim\": 3072,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-Activity\",\n",
            "    \"2\": \"B-Administration\",\n",
            "    \"3\": \"B-Age\",\n",
            "    \"4\": \"B-Area\",\n",
            "    \"5\": \"B-Biological_attribute\",\n",
            "    \"6\": \"B-Biological_structure\",\n",
            "    \"7\": \"B-Clinical_event\",\n",
            "    \"8\": \"B-Color\",\n",
            "    \"9\": \"B-Coreference\",\n",
            "    \"10\": \"B-Date\",\n",
            "    \"11\": \"B-Detailed_description\",\n",
            "    \"12\": \"B-Diagnostic_procedure\",\n",
            "    \"13\": \"B-Disease_disorder\",\n",
            "    \"14\": \"B-Distance\",\n",
            "    \"15\": \"B-Dosage\",\n",
            "    \"16\": \"B-Duration\",\n",
            "    \"17\": \"B-Family_history\",\n",
            "    \"18\": \"B-Frequency\",\n",
            "    \"19\": \"B-Height\",\n",
            "    \"20\": \"B-History\",\n",
            "    \"21\": \"B-Lab_value\",\n",
            "    \"22\": \"B-Mass\",\n",
            "    \"23\": \"B-Medication\",\n",
            "    \"24\": \"B-Non[biological](Detailed_description\",\n",
            "    \"25\": \"B-Nonbiological_location\",\n",
            "    \"26\": \"B-Occupation\",\n",
            "    \"27\": \"B-Other_entity\",\n",
            "    \"28\": \"B-Other_event\",\n",
            "    \"29\": \"B-Outcome\",\n",
            "    \"30\": \"B-Personal_[back](Biological_structure\",\n",
            "    \"31\": \"B-Personal_background\",\n",
            "    \"32\": \"B-Qualitative_concept\",\n",
            "    \"33\": \"B-Quantitative_concept\",\n",
            "    \"34\": \"B-Severity\",\n",
            "    \"35\": \"B-Sex\",\n",
            "    \"36\": \"B-Shape\",\n",
            "    \"37\": \"B-Sign_symptom\",\n",
            "    \"38\": \"B-Subject\",\n",
            "    \"39\": \"B-Texture\",\n",
            "    \"40\": \"B-Therapeutic_procedure\",\n",
            "    \"41\": \"B-Time\",\n",
            "    \"42\": \"B-Volume\",\n",
            "    \"43\": \"B-Weight\",\n",
            "    \"44\": \"I-Activity\",\n",
            "    \"45\": \"I-Administration\",\n",
            "    \"46\": \"I-Age\",\n",
            "    \"47\": \"I-Area\",\n",
            "    \"48\": \"I-Biological_attribute\",\n",
            "    \"49\": \"I-Biological_structure\",\n",
            "    \"50\": \"I-Clinical_event\",\n",
            "    \"51\": \"I-Color\",\n",
            "    \"52\": \"I-Coreference\",\n",
            "    \"53\": \"I-Date\",\n",
            "    \"54\": \"I-Detailed_description\",\n",
            "    \"55\": \"I-Diagnostic_procedure\",\n",
            "    \"56\": \"I-Disease_disorder\",\n",
            "    \"57\": \"I-Distance\",\n",
            "    \"58\": \"I-Dosage\",\n",
            "    \"59\": \"I-Duration\",\n",
            "    \"60\": \"I-Family_history\",\n",
            "    \"61\": \"I-Frequency\",\n",
            "    \"62\": \"I-Height\",\n",
            "    \"63\": \"I-History\",\n",
            "    \"64\": \"I-Lab_value\",\n",
            "    \"65\": \"I-Mass\",\n",
            "    \"66\": \"I-Medication\",\n",
            "    \"67\": \"I-Nonbiological_location\",\n",
            "    \"68\": \"I-Occupation\",\n",
            "    \"69\": \"I-Other_entity\",\n",
            "    \"70\": \"I-Other_event\",\n",
            "    \"71\": \"I-Outcome\",\n",
            "    \"72\": \"I-Personal_background\",\n",
            "    \"73\": \"I-Qualitative_concept\",\n",
            "    \"74\": \"I-Quantitative_concept\",\n",
            "    \"75\": \"I-Severity\",\n",
            "    \"76\": \"I-Shape\",\n",
            "    \"77\": \"I-Sign_symptom\",\n",
            "    \"78\": \"I-Subject\",\n",
            "    \"79\": \"I-Texture\",\n",
            "    \"80\": \"I-Therapeutic_procedure\",\n",
            "    \"81\": \"I-Time\",\n",
            "    \"82\": \"I-Volume\",\n",
            "    \"83\": \"I-Weight\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"label2id\": {\n",
            "    \"B-Activity\": 1,\n",
            "    \"B-Administration\": 2,\n",
            "    \"B-Age\": 3,\n",
            "    \"B-Area\": 4,\n",
            "    \"B-Biological_attribute\": 5,\n",
            "    \"B-Biological_structure\": 6,\n",
            "    \"B-Clinical_event\": 7,\n",
            "    \"B-Color\": 8,\n",
            "    \"B-Coreference\": 9,\n",
            "    \"B-Date\": 10,\n",
            "    \"B-Detailed_description\": 11,\n",
            "    \"B-Diagnostic_procedure\": 12,\n",
            "    \"B-Disease_disorder\": 13,\n",
            "    \"B-Distance\": 14,\n",
            "    \"B-Dosage\": 15,\n",
            "    \"B-Duration\": 16,\n",
            "    \"B-Family_history\": 17,\n",
            "    \"B-Frequency\": 18,\n",
            "    \"B-Height\": 19,\n",
            "    \"B-History\": 20,\n",
            "    \"B-Lab_value\": 21,\n",
            "    \"B-Mass\": 22,\n",
            "    \"B-Medication\": 23,\n",
            "    \"B-Non[biological](Detailed_description\": 24,\n",
            "    \"B-Nonbiological_location\": 25,\n",
            "    \"B-Occupation\": 26,\n",
            "    \"B-Other_entity\": 27,\n",
            "    \"B-Other_event\": 28,\n",
            "    \"B-Outcome\": 29,\n",
            "    \"B-Personal_[back](Biological_structure\": 30,\n",
            "    \"B-Personal_background\": 31,\n",
            "    \"B-Qualitative_concept\": 32,\n",
            "    \"B-Quantitative_concept\": 33,\n",
            "    \"B-Severity\": 34,\n",
            "    \"B-Sex\": 35,\n",
            "    \"B-Shape\": 36,\n",
            "    \"B-Sign_symptom\": 37,\n",
            "    \"B-Subject\": 38,\n",
            "    \"B-Texture\": 39,\n",
            "    \"B-Therapeutic_procedure\": 40,\n",
            "    \"B-Time\": 41,\n",
            "    \"B-Volume\": 42,\n",
            "    \"B-Weight\": 43,\n",
            "    \"I-Activity\": 44,\n",
            "    \"I-Administration\": 45,\n",
            "    \"I-Age\": 46,\n",
            "    \"I-Area\": 47,\n",
            "    \"I-Biological_attribute\": 48,\n",
            "    \"I-Biological_structure\": 49,\n",
            "    \"I-Clinical_event\": 50,\n",
            "    \"I-Color\": 51,\n",
            "    \"I-Coreference\": 52,\n",
            "    \"I-Date\": 53,\n",
            "    \"I-Detailed_description\": 54,\n",
            "    \"I-Diagnostic_procedure\": 55,\n",
            "    \"I-Disease_disorder\": 56,\n",
            "    \"I-Distance\": 57,\n",
            "    \"I-Dosage\": 58,\n",
            "    \"I-Duration\": 59,\n",
            "    \"I-Family_history\": 60,\n",
            "    \"I-Frequency\": 61,\n",
            "    \"I-Height\": 62,\n",
            "    \"I-History\": 63,\n",
            "    \"I-Lab_value\": 64,\n",
            "    \"I-Mass\": 65,\n",
            "    \"I-Medication\": 66,\n",
            "    \"I-Nonbiological_location\": 67,\n",
            "    \"I-Occupation\": 68,\n",
            "    \"I-Other_entity\": 69,\n",
            "    \"I-Other_event\": 70,\n",
            "    \"I-Outcome\": 71,\n",
            "    \"I-Personal_background\": 72,\n",
            "    \"I-Qualitative_concept\": 73,\n",
            "    \"I-Quantitative_concept\": 74,\n",
            "    \"I-Severity\": 75,\n",
            "    \"I-Shape\": 76,\n",
            "    \"I-Sign_symptom\": 77,\n",
            "    \"I-Subject\": 78,\n",
            "    \"I-Texture\": 79,\n",
            "    \"I-Therapeutic_procedure\": 80,\n",
            "    \"I-Time\": 81,\n",
            "    \"I-Volume\": 82,\n",
            "    \"I-Weight\": 83,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"distilbert\",\n",
            "  \"n_heads\": 12,\n",
            "  \"n_layers\": 6,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"qa_dropout\": 0.1,\n",
            "  \"seq_classif_dropout\": 0.2,\n",
            "  \"sinusoidal_pos_embds\": false,\n",
            "  \"tie_weights_\": true,\n",
            "  \"torch_dtype\": \"float32\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"vocab_size\": 30522\n",
            "}\n",
            "\n",
            "loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/pytorch_model.bin\n",
            "All model checkpoint weights were used when initializing DistilBertForTokenClassification.\n",
            "\n",
            "All the weights of DistilBertForTokenClassification were initialized from the model checkpoint at d4data/biomedical-ner-all.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use DistilBertForTokenClassification for predictions without further training.\n"
          ]
        }
      ],
      "source": [
        "model = AutoModelForTokenClassification.from_pretrained(    \n",
        "    model_checkpoint\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 77,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "MQILaROUVDhN",
        "outputId": "a548d6da-abe4-417a-99eb-f65d2c5b3273"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "PyTorch: setting up devices\n",
            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
          ]
        }
      ],
      "source": [
        "args = TrainingArguments(\n",
        "    \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
        "    evaluation_strategy = IntervalStrategy.STEPS,\n",
        "    eval_steps = 50,\n",
        "    learning_rate=5e-5,\n",
        "    num_train_epochs=50,\n",
        "    weight_decay=0.01,\n",
        "    metric_for_best_model = 'f1',\n",
        "   load_best_model_at_end=True\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 78,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "cb0a12c3-0ce4-4d59-d773-f2f31ff05b68",
        "id": "MRJatT86VDhN"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:310: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  FutureWarning,\n",
            "***** Running training *****\n",
            "  Num examples = 8751\n",
            "  Num Epochs = 50\n",
            "  Instantaneous batch size per device = 8\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 54700\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='1650' max='54700' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [ 1650/54700 04:10 < 2:14:23, 6.58 it/s, Epoch 1/50]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Precision</th>\n",
              "      <th>Recall</th>\n",
              "      <th>F1</th>\n",
              "      <th>Accuracy</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>50</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.220473</td>\n",
              "      <td>0.223507</td>\n",
              "      <td>0.240041</td>\n",
              "      <td>0.231479</td>\n",
              "      <td>0.923262</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.178907</td>\n",
              "      <td>0.233188</td>\n",
              "      <td>0.249353</td>\n",
              "      <td>0.241000</td>\n",
              "      <td>0.932774</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>150</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.170712</td>\n",
              "      <td>0.209794</td>\n",
              "      <td>0.210554</td>\n",
              "      <td>0.210173</td>\n",
              "      <td>0.934965</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>200</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.161026</td>\n",
              "      <td>0.308015</td>\n",
              "      <td>0.341956</td>\n",
              "      <td>0.324099</td>\n",
              "      <td>0.937756</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>250</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.167800</td>\n",
              "      <td>0.290117</td>\n",
              "      <td>0.282462</td>\n",
              "      <td>0.286239</td>\n",
              "      <td>0.940074</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>300</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.179418</td>\n",
              "      <td>0.388704</td>\n",
              "      <td>0.302638</td>\n",
              "      <td>0.340314</td>\n",
              "      <td>0.939863</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>350</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.160154</td>\n",
              "      <td>0.333333</td>\n",
              "      <td>0.443870</td>\n",
              "      <td>0.380741</td>\n",
              "      <td>0.938609</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>400</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.199905</td>\n",
              "      <td>0.207132</td>\n",
              "      <td>0.204346</td>\n",
              "      <td>0.205729</td>\n",
              "      <td>0.933585</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>450</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.154181</td>\n",
              "      <td>0.349614</td>\n",
              "      <td>0.422142</td>\n",
              "      <td>0.382470</td>\n",
              "      <td>0.940611</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>500</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.154374</td>\n",
              "      <td>0.376731</td>\n",
              "      <td>0.422142</td>\n",
              "      <td>0.398146</td>\n",
              "      <td>0.941137</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>550</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.154931</td>\n",
              "      <td>0.426748</td>\n",
              "      <td>0.432488</td>\n",
              "      <td>0.429599</td>\n",
              "      <td>0.943276</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>600</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.147228</td>\n",
              "      <td>0.402466</td>\n",
              "      <td>0.472840</td>\n",
              "      <td>0.434824</td>\n",
              "      <td>0.942075</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>650</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.154059</td>\n",
              "      <td>0.274133</td>\n",
              "      <td>0.265908</td>\n",
              "      <td>0.269958</td>\n",
              "      <td>0.939326</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>700</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.158030</td>\n",
              "      <td>0.389341</td>\n",
              "      <td>0.544232</td>\n",
              "      <td>0.453937</td>\n",
              "      <td>0.940095</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>750</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.137712</td>\n",
              "      <td>0.373972</td>\n",
              "      <td>0.399897</td>\n",
              "      <td>0.386500</td>\n",
              "      <td>0.946952</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>800</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.145364</td>\n",
              "      <td>0.385280</td>\n",
              "      <td>0.530781</td>\n",
              "      <td>0.446475</td>\n",
              "      <td>0.941485</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>850</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.148491</td>\n",
              "      <td>0.401239</td>\n",
              "      <td>0.535954</td>\n",
              "      <td>0.458915</td>\n",
              "      <td>0.940895</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>900</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.149123</td>\n",
              "      <td>0.374374</td>\n",
              "      <td>0.464046</td>\n",
              "      <td>0.414414</td>\n",
              "      <td>0.946784</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>950</td>\n",
              "      <td>0.213900</td>\n",
              "      <td>0.142598</td>\n",
              "      <td>0.405747</td>\n",
              "      <td>0.365235</td>\n",
              "      <td>0.384427</td>\n",
              "      <td>0.946963</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1000</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.143212</td>\n",
              "      <td>0.449354</td>\n",
              "      <td>0.341956</td>\n",
              "      <td>0.388367</td>\n",
              "      <td>0.945857</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1050</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.146789</td>\n",
              "      <td>0.435653</td>\n",
              "      <td>0.495603</td>\n",
              "      <td>0.463698</td>\n",
              "      <td>0.947300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1100</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.137099</td>\n",
              "      <td>0.421190</td>\n",
              "      <td>0.501811</td>\n",
              "      <td>0.457979</td>\n",
              "      <td>0.948090</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1150</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.144017</td>\n",
              "      <td>0.437966</td>\n",
              "      <td>0.485773</td>\n",
              "      <td>0.460633</td>\n",
              "      <td>0.948532</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1200</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.142823</td>\n",
              "      <td>0.459889</td>\n",
              "      <td>0.344025</td>\n",
              "      <td>0.393608</td>\n",
              "      <td>0.947416</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1250</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.163925</td>\n",
              "      <td>0.380834</td>\n",
              "      <td>0.476979</td>\n",
              "      <td>0.423519</td>\n",
              "      <td>0.947732</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1300</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.153968</td>\n",
              "      <td>0.461087</td>\n",
              "      <td>0.447491</td>\n",
              "      <td>0.454187</td>\n",
              "      <td>0.948711</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1350</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.157536</td>\n",
              "      <td>0.465220</td>\n",
              "      <td>0.432488</td>\n",
              "      <td>0.448257</td>\n",
              "      <td>0.949680</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1400</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.149706</td>\n",
              "      <td>0.421693</td>\n",
              "      <td>0.394206</td>\n",
              "      <td>0.407487</td>\n",
              "      <td>0.947236</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1450</td>\n",
              "      <td>0.153200</td>\n",
              "      <td>0.178025</td>\n",
              "      <td>0.472966</td>\n",
              "      <td>0.448008</td>\n",
              "      <td>0.460149</td>\n",
              "      <td>0.947447</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1500</td>\n",
              "      <td>0.113500</td>\n",
              "      <td>0.146669</td>\n",
              "      <td>0.440809</td>\n",
              "      <td>0.529747</td>\n",
              "      <td>0.481203</td>\n",
              "      <td>0.947689</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1550</td>\n",
              "      <td>0.113500</td>\n",
              "      <td>0.144341</td>\n",
              "      <td>0.480186</td>\n",
              "      <td>0.426280</td>\n",
              "      <td>0.451631</td>\n",
              "      <td>0.950344</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1600</td>\n",
              "      <td>0.113500</td>\n",
              "      <td>0.140071</td>\n",
              "      <td>0.446570</td>\n",
              "      <td>0.495085</td>\n",
              "      <td>0.469578</td>\n",
              "      <td>0.948690</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1650</td>\n",
              "      <td>0.113500</td>\n",
              "      <td>0.175828</td>\n",
              "      <td>0.432393</td>\n",
              "      <td>0.309364</td>\n",
              "      <td>0.360676</td>\n",
              "      <td>0.945825</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "\n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500 (score: 0.48120300751879697).\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=1650, training_loss=0.15572210947672527, metrics={'train_runtime': 250.5398, 'train_samples_per_second': 1746.429, 'train_steps_per_second': 218.329, 'total_flos': 244690187133744.0, 'train_loss': 0.15572210947672527, 'epoch': 1.51})"
            ]
          },
          "metadata": {},
          "execution_count": 78
        }
      ],
      "source": [
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=args,\n",
        "    train_dataset=tokenized_datasets[\"train\"],\n",
        "    eval_dataset=tokenized_datasets[\"validation\"],\n",
        "    data_collator=data_collator,\n",
        "    compute_metrics=compute_metrics,\n",
        "    tokenizer=tokenizer,\n",
        "    callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
        ")\n",
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 79,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "cb8d8bbc-fe8e-4926-d201-8a64dc0edcdb",
        "id": "P2GfgJz0VDhO"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Saving model checkpoint to model/distilbert-base-uncased-sentence\n",
            "Configuration saved in model/distilbert-base-uncased-sentence/config.json\n",
            "Model weights saved in model/distilbert-base-uncased-sentence/pytorch_model.bin\n",
            "tokenizer config file saved in model/distilbert-base-uncased-sentence/tokenizer_config.json\n",
            "Special tokens file saved in model/distilbert-base-uncased-sentence/special_tokens_map.json\n"
          ]
        }
      ],
      "source": [
        "trainer.save_model('model/distilbert-base-uncased-sentence')"
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "lzL_QhxW7Dha"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Model 2 - pucpr/clinicalnerpt-medical\n",
        "\n",
        "Whole document based tokenization"
      ],
      "metadata": {
        "id": "GgGucecD600w"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 80,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "f0fc94c6df4c432f9e1edcfceaf44edd",
            "03f4216f904d4bf6a8e461c50f40378f",
            "76fecb01ad6441a08ad0ddb989a8ab80",
            "783edab3150d40a3bf99ed910cdbaf88",
            "e33897e8fa4841308c55b19352876ab1",
            "909d7d349769402a8c20b02b35eafbd9",
            "6139e3d551044671a79e15e8adf351ab",
            "1c19ec799ccb4e788f34ed8ac37d495f",
            "7a49ac5f18f64b41825378184f8c32ec",
            "76ad2415389f41deb4f2810bca56b753",
            "8c4bb02e55fa48429c8f86dac5cb369e",
            "980374f604ec4970b0afa70d108c864b",
            "baa9e8a9169a45efb8c117fdf4ea45b7",
            "aa248b7d4eae4e5d965a7d04144adacc",
            "311d8199627e4c4f83d02c167b5755f3",
            "a299d926edbb4c51906b1be8f694d074",
            "c76ebf5d6c6c46cd9d14591a47725ae1",
            "178555439d854d30a01cac053adf9079",
            "00c8d81110fe4f4bbcf77be4d20581c3",
            "43c18ef010014cb395f045dd26497fba",
            "3b83d1f429d34e8e8de6ddff555df02d",
            "91a8348ac2194686a9ef075f7d49687d",
            "42dcc74bff5440608a3e9f2fa580cd3c",
            "616c54d0cd534047b93b215e7baf2ba3",
            "846ce38d6fb84279a7419091f2d269b0",
            "1e15050772b54e34b014a98b9710c783",
            "951f476862ea49619100e202a6e742f6",
            "f6e25547ba664cd59128536944a926fd",
            "53185a2afedc41e0a680d5007656b90b",
            "860f8204efae42d6ad2fa4eb9e661810",
            "e61ec283a9c04cc696d17bbe24ccf460",
            "15abc268777a4e3cb5c3c7f430745c1d",
            "e6c1266e8b074bdfafa0db6208743a07",
            "01a12f499b7942cc90f2032a8f3284e9",
            "804bf9b8a2154399a05dd0860f4dfd89",
            "66fca4d7946240c3b08ba51fac82f2ae",
            "6cefb3be5be9488ca033ed9908c6a8f5",
            "2d0f2804db004da0914e4733ce96b749",
            "3d6b93cde5254ce99f19802b7c1146f4",
            "838416bfbee0400299abff324c4825bc",
            "f7584b77d97f4d48aa5b50bae2df49f8",
            "0a5cd97f8a914ff89fd27aed7b38164d",
            "ad4c308ebd574e909d31b161580b9064",
            "74030c98fcb942ed9d9ffc43799113f0"
          ]
        },
        "id": "axyP0XOOKpLg",
        "outputId": "da8d34ae-66c7-459a-99be-56d7a434ec80"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/151 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f0fc94c6df4c432f9e1edcfceaf44edd"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/1.05k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "980374f604ec4970b0afa70d108c864b"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/996k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "42dcc74bff5440608a3e9f2fa580cd3c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "01a12f499b7942cc90f2032a8f3284e9"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/vocab.txt\n",
            "loading file tokenizer.json from cache at None\n",
            "loading file added_tokens.json from cache at None\n",
            "loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/special_tokens_map.json\n",
            "loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/tokenizer_config.json\n",
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n",
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n"
          ]
        }
      ],
      "source": [
        "model_checkpoint = \"pucpr/clinicalnerpt-medical\"\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 81,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JPTd6vUPKpLg",
        "outputId": "84451d5e-eb3a-4763-dc29-5ffebafc4c3d"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 81
        }
      ],
      "source": [
        "tokenizer.is_fast"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 82,
      "metadata": {
        "id": "q8o2kvS69K2h"
      },
      "outputs": [],
      "source": [
        "dic = {\"tokens\": HCs_tokenized, \"ner_tags\": labels_tokenized} #For the whole clinical case. We used this option for our paper.\n",
        "#dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 83,
      "metadata": {
        "id": "6JXCzYF49K2h"
      },
      "outputs": [],
      "source": [
        "dataset = Dataset.from_dict(dic)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 84,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "f01f435d-ced1-4002-cc23-05e42329c8df",
        "id": "AGI1Hf_E9K2h"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 741\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 84
        }
      ],
      "source": [
        "dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 85,
      "metadata": {
        "id": "CUH94L-29K2h"
      },
      "outputs": [],
      "source": [
        "#For training, validation, and test partitions\n",
        "\"\"\"\n",
        "#Train, val, test partitions\n",
        "train_test = dataset.train_test_split()\n",
        "test_val = train_test['test'].train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': test_val['train'],\n",
        "    'test': test_val['test']\n",
        "    })\n",
        "\"\"\"\n",
        "\n",
        "#Just for training and validation partitions\n",
        "train_test = dataset.train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': train_test['test']\n",
        "    })"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 86,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "654a2798-e4f3-49ce-c59c-747c7538352b",
        "id": "zRYC70NF9K2i"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "DatasetDict({\n",
              "    train: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 555\n",
              "    })\n",
              "    validation: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 186\n",
              "    })\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 86
        }
      ],
      "source": [
        "raw_datasets"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 87,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e1de590a-ea92-4b0b-9da3-c65152d08872",
        "id": "dCZcYkCa9K2i"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 1,\n",
              " 2,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0,\n",
              " 0]"
            ]
          },
          "metadata": {},
          "execution_count": 87
        }
      ],
      "source": [
        "raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"chunk_tags\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 88,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6b0070ab-8b15-4750-e848-a122ca47e050",
        "id": "wWinDHdc9K2i"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 555\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 88
        }
      ],
      "source": [
        "raw_datasets['train']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 89,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ea900262-0ead-4843-da63-d1b736a2ad29",
        "id": "NeEdChWM9K2i"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['O', 'B', 'I']"
            ]
          },
          "metadata": {},
          "execution_count": 89
        }
      ],
      "source": [
        "label_names = ['O','B','I']\n",
        "label_names"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 90,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e264924f-3c29-4f64-d8f6-4441ca35c9b1",
        "id": "FwWqjcpJ9K2i"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "This is a 79 - year - old woman with a history of hypertension , osteoporosis and hysterectomy for myomatosis at the age of 50 . She underwent transurethral resection of infiltrating bladder carcinoma in October 2006 . She subsequently received radiotherapy sessions up to a total of 50 Gy due to persistence of an external tumour mass in the right angle of the bladder , finishing this treatment in June 2007 . In August 2007 she began chemotherapy treatment due to persistence of the bladder lesion and metastases in the spine detected by follow - up CT scan and bone scintigraphy . Her digestive history began in February 2008 when she was admitted for episodes of rectorrhagia , initially scarce and distal , but which soon became more frequent and profuse , accompanied by symptoms of haemodynamic instability and severe anaemia with extensive transfusion requirements . \n",
            " Total colonoscopy was performed , showing only changes typical of actinic proctitis with large friable and bleeding neovascular lesions ; treatment was carried out with argon plasma ( APC ) . The patient 's clinical course was unfavourable : she received consecutive treatment with steroid enemas , 5 - aminosalicylic acid and sucralfate ; in addition , three more therapeutic rectoscopies were performed , applying APC treatment , despite which the episodes of profuse rectorrhagia with haemodynamic instability persisted , requiring transfusion of a total of 21 red blood cell concentrates throughout the patient 's hospitalisation period , despite also receiving oral and intravenous ferrotherapy . \n",
            " In the absence of response to these treatments , the case was discussed with the surgeon for topical treatment with formalin , who , using spinal anaesthesia and anal dilatation , treated the rectal ampulla for 10 minutes with a 10 % formalin solution 200 ml + 300 ml of water ; The patient 's tolerance to the procedure was excellent , it was carried out without any complications and from that moment onwards the patient was completely asymptomatic without new episodes of haemorrhagic externalisation , haemodynamic instability or new transfusion requirements ; she was discharged and a follow - up colonoscopy was proposed after the treatment , but she did not accept . \n",
            " Four months later , she was admitted again for clinical signs of tumour progression , and symptomatic treatment was decided by the Oncology Department , and the patient died , but without recurrence of the rectorrhagia . \n",
            "\n",
            "\n",
            " \n",
            "O    O  O O  O O    O O   O     O    O O       O  O            O B            I   O            O   B          O  O   O   O  O  O O   O         O             O         O  B            B       I         O  O       O    O O   O            O        O            O        O  O  O O     O  O  O  O   O  O           O  O  O        B      I    O  O   O     O     O  O   B       O O         O    O         O  O    O    O O  O      O    O   O     O            O         O   O  O           O  O   B       I      I   O          O  O   O     O        O  O      O O  O  O    O   O    O            O O   O         O       O     O  O        O    O    O   O   O        O   O        O  O            O O         O      O   O      O O   O     O    O      O    O        O   O       O O           O  O        O  O            O           O   B      I       O    O         O           O            O O O     O           O   O         O O       O    O       O       O  B       I         O    O     B       I   I        I           I       O O         O   O       O   O    O     O      O O   O O O   O       O  O        O      O   O            O O   O        O           O         O    O       O      O O O O              O    O   O          O O  O        O O     O    O           O            O    O         O O        O   O         O O       O     O   O        O  O       O            O    O            O           O         O O         O           O  O O     O  O  O   O     O    O            O          O   O       O  O               O      O O       O    O         O    O   O           O            O O O  O   O       O  O        O  O     O          O O   O    O   O         O    O   O       O   O       O         O    O        O O   O O     O      O           O   O    O          O O       O   O      O       O   O  O       O    O O  O O        O        O   O  O O   O  O  O     O O   O       O  O         O  O   O         O   O         O O  O   O       O   O       O   O             O   O    O    O      O       O   O       O   O          O            O       O   O        O  O            O               O O            O           O  O   O           O            O O   O   O          O   O O      O O  O           O   O        O     O   O         O O   O   O   O   O      O O O    O      O     O O   O   O        O     O   O        O     O  B      I           O O   O           O         O   O       O  O   O        O          O O   O   O       O    O O   O       O          O  O   O            O O   \n"
          ]
        }
      ],
      "source": [
        "words = raw_datasets[\"train\"][0][\"tokens\"]\n",
        "labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
        "#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
        "line1 = \"\"\n",
        "line2 = \"\"\n",
        "for word, label in zip(words, labels):\n",
        "    full_label = label_names[label]\n",
        "    max_length = max(len(word), len(full_label))\n",
        "    line1 += word + \" \" * (max_length - len(word) + 1)\n",
        "    line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
        "\n",
        "print(line1)\n",
        "print(line2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 91,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Bhx4OJLrKpLg",
        "outputId": "08eb850e-d3a5-41e0-9dcf-89f7e7748b02"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Token indices sequence length is longer than the specified maximum sequence length for this model (578 > 512). Running this sequence through the model will result in indexing errors\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['[CLS]',\n",
              " 'this',\n",
              " 'is',\n",
              " 'a',\n",
              " '79',\n",
              " '-',\n",
              " 'year',\n",
              " '-',\n",
              " 'old',\n",
              " 'woman',\n",
              " 'with',\n",
              " 'a',\n",
              " 'history',\n",
              " 'of',\n",
              " 'hy',\n",
              " '##pert',\n",
              " '##ension',\n",
              " ',',\n",
              " 'os',\n",
              " '##te',\n",
              " '##op',\n",
              " '##oros',\n",
              " '##is',\n",
              " 'and',\n",
              " 'hy',\n",
              " '##ster',\n",
              " '##ect',\n",
              " '##omy',\n",
              " 'for',\n",
              " 'my',\n",
              " '##oma',\n",
              " '##tos',\n",
              " '##is',\n",
              " 'at',\n",
              " 'the',\n",
              " 'age',\n",
              " 'of',\n",
              " '50',\n",
              " '.',\n",
              " 'she',\n",
              " 'underwent',\n",
              " 'trans',\n",
              " '##ure',\n",
              " '##th',\n",
              " '##ral',\n",
              " 'res',\n",
              " '##ection',\n",
              " 'of',\n",
              " 'in',\n",
              " '##fil',\n",
              " '##trat',\n",
              " '##ing',\n",
              " 'blad',\n",
              " '##der',\n",
              " 'car',\n",
              " '##cino',\n",
              " '##ma',\n",
              " 'in',\n",
              " 'o',\n",
              " '##cto',\n",
              " '##ber',\n",
              " '2006',\n",
              " '.',\n",
              " 'she',\n",
              " 'subsequently',\n",
              " 'received',\n",
              " 'radio',\n",
              " '##ther',\n",
              " '##ap',\n",
              " '##y',\n",
              " 'sessions',\n",
              " 'up',\n",
              " 'to',\n",
              " 'a',\n",
              " 'total',\n",
              " 'of',\n",
              " '50',\n",
              " 'g',\n",
              " '##y',\n",
              " 'due',\n",
              " 'to',\n",
              " 'pers',\n",
              " '##isten',\n",
              " '##ce',\n",
              " 'of',\n",
              " 'an',\n",
              " 'external',\n",
              " 'tu',\n",
              " '##mou',\n",
              " '##r',\n",
              " 'mass',\n",
              " 'in',\n",
              " 'the',\n",
              " 'right',\n",
              " 'angle',\n",
              " 'of',\n",
              " 'the',\n",
              " 'blad',\n",
              " '##der',\n",
              " ',',\n",
              " 'finishing',\n",
              " 'this',\n",
              " 'treatment',\n",
              " 'in',\n",
              " 'ju',\n",
              " '##ne',\n",
              " '2007',\n",
              " '.',\n",
              " 'in',\n",
              " 'august',\n",
              " '2007',\n",
              " 'she',\n",
              " 'began',\n",
              " 'che',\n",
              " '##mot',\n",
              " '##hera',\n",
              " '##py',\n",
              " 'treatment',\n",
              " 'due',\n",
              " 'to',\n",
              " 'pers',\n",
              " '##isten',\n",
              " '##ce',\n",
              " 'of',\n",
              " 'the',\n",
              " 'blad',\n",
              " '##der',\n",
              " 'les',\n",
              " '##ion',\n",
              " 'and',\n",
              " 'meta',\n",
              " '##stas',\n",
              " '##es',\n",
              " 'in',\n",
              " 'the',\n",
              " 'spin',\n",
              " '##e',\n",
              " 'det',\n",
              " '##ected',\n",
              " 'by',\n",
              " 'follow',\n",
              " '-',\n",
              " 'up',\n",
              " 'c',\n",
              " '##t',\n",
              " 's',\n",
              " '##can',\n",
              " 'and',\n",
              " 'bone',\n",
              " 'sci',\n",
              " '##nti',\n",
              " '##graphy',\n",
              " '.',\n",
              " 'her',\n",
              " 'dig',\n",
              " '##esti',\n",
              " '##ve',\n",
              " 'history',\n",
              " 'began',\n",
              " 'in',\n",
              " 'februar',\n",
              " '##y',\n",
              " '2008',\n",
              " 'when',\n",
              " 'she',\n",
              " 'was',\n",
              " 'admitted',\n",
              " 'for',\n",
              " 'episodes',\n",
              " 'of',\n",
              " 'rector',\n",
              " '##r',\n",
              " '##ha',\n",
              " '##gia',\n",
              " ',',\n",
              " 'initially',\n",
              " 's',\n",
              " '##car',\n",
              " '##ce',\n",
              " 'and',\n",
              " 'dis',\n",
              " '##tal',\n",
              " ',',\n",
              " 'but',\n",
              " 'which',\n",
              " 'soon',\n",
              " 'became',\n",
              " 'more',\n",
              " 'frequent',\n",
              " 'and',\n",
              " 'prof',\n",
              " '##use',\n",
              " ',',\n",
              " 'accompanied',\n",
              " 'by',\n",
              " 'symptoms',\n",
              " 'of',\n",
              " 'hae',\n",
              " '##mo',\n",
              " '##dyn',\n",
              " '##ami',\n",
              " '##c',\n",
              " 'ins',\n",
              " '##tab',\n",
              " '##ility',\n",
              " 'and',\n",
              " 'severe',\n",
              " 'ana',\n",
              " '##emia',\n",
              " 'with',\n",
              " 'extensive',\n",
              " 'trans',\n",
              " '##fus',\n",
              " '##ion',\n",
              " 'requirements',\n",
              " '.',\n",
              " 'total',\n",
              " 'colonos',\n",
              " '##co',\n",
              " '##py',\n",
              " 'was',\n",
              " 'performed',\n",
              " ',',\n",
              " 'showing',\n",
              " 'only',\n",
              " 'changes',\n",
              " 'typical',\n",
              " 'of',\n",
              " 'act',\n",
              " '##ini',\n",
              " '##c',\n",
              " 'pro',\n",
              " '##cti',\n",
              " '##tis',\n",
              " 'with',\n",
              " 'large',\n",
              " 'fri',\n",
              " '##able',\n",
              " 'and',\n",
              " 'ble',\n",
              " '##eding',\n",
              " 'neo',\n",
              " '##vas',\n",
              " '##cular',\n",
              " 'les',\n",
              " '##ions',\n",
              " ';',\n",
              " 'treatment',\n",
              " 'was',\n",
              " 'carried',\n",
              " 'out',\n",
              " 'with',\n",
              " 'ar',\n",
              " '##gon',\n",
              " 'plasma',\n",
              " '(',\n",
              " 'ap',\n",
              " '##c',\n",
              " ')',\n",
              " '.',\n",
              " 'the',\n",
              " 'patient',\n",
              " \"'\",\n",
              " 's',\n",
              " 'clinical',\n",
              " 'course',\n",
              " 'was',\n",
              " 'un',\n",
              " '##fa',\n",
              " '##vour',\n",
              " '##able',\n",
              " ':',\n",
              " 'she',\n",
              " 'received',\n",
              " 'consecutive',\n",
              " 'treatment',\n",
              " 'with',\n",
              " 'ster',\n",
              " '##oid',\n",
              " 'ene',\n",
              " '##mas',\n",
              " ',',\n",
              " '5',\n",
              " '-',\n",
              " 'amino',\n",
              " '##sal',\n",
              " '##icy',\n",
              " '##lic',\n",
              " 'acid',\n",
              " 'and',\n",
              " 'su',\n",
              " '##cra',\n",
              " '##lfa',\n",
              " '##te',\n",
              " ';',\n",
              " 'in',\n",
              " 'addition',\n",
              " ',',\n",
              " 'three',\n",
              " 'more',\n",
              " 'the',\n",
              " '##rap',\n",
              " '##eu',\n",
              " '##tic',\n",
              " 're',\n",
              " '##ctos',\n",
              " '##co',\n",
              " '##pies',\n",
              " 'were',\n",
              " 'performed',\n",
              " ',',\n",
              " 'apply',\n",
              " '##ing',\n",
              " 'ap',\n",
              " '##c',\n",
              " 'treatment',\n",
              " ',',\n",
              " 'despite',\n",
              " 'which',\n",
              " 'the',\n",
              " 'episodes',\n",
              " 'of',\n",
              " 'prof',\n",
              " '##use',\n",
              " 'rector',\n",
              " '##r',\n",
              " '##ha',\n",
              " '##gia',\n",
              " 'with',\n",
              " 'hae',\n",
              " '##mo',\n",
              " '##dyn',\n",
              " '##ami',\n",
              " '##c',\n",
              " 'ins',\n",
              " '##tab',\n",
              " '##ility',\n",
              " 'pers',\n",
              " '##isted',\n",
              " ',',\n",
              " 'requiring',\n",
              " 'trans',\n",
              " '##fus',\n",
              " '##ion',\n",
              " 'of',\n",
              " 'a',\n",
              " 'total',\n",
              " 'of',\n",
              " '21',\n",
              " 'red',\n",
              " 'blood',\n",
              " 'cell',\n",
              " 'con',\n",
              " '##centra',\n",
              " '##tes',\n",
              " 'throughout',\n",
              " 'the',\n",
              " 'patient',\n",
              " \"'\",\n",
              " 's',\n",
              " 'hospital',\n",
              " '##isation',\n",
              " 'period',\n",
              " ',',\n",
              " 'despite',\n",
              " 'also',\n",
              " 'receiving',\n",
              " 'oral',\n",
              " 'and',\n",
              " 'intra',\n",
              " '##veno',\n",
              " '##us',\n",
              " 'ferro',\n",
              " '##ther',\n",
              " '##ap',\n",
              " '##y',\n",
              " '.',\n",
              " 'in',\n",
              " 'the',\n",
              " 'absence',\n",
              " 'of',\n",
              " 'response',\n",
              " 'to',\n",
              " 'these',\n",
              " 'treatment',\n",
              " '##s',\n",
              " ',',\n",
              " 'the',\n",
              " 'case',\n",
              " 'was',\n",
              " 'discussed',\n",
              " 'with',\n",
              " 'the',\n",
              " 'surgeon',\n",
              " 'for',\n",
              " 'topic',\n",
              " '##al',\n",
              " 'treatment',\n",
              " 'with',\n",
              " 'formal',\n",
              " '##in',\n",
              " ',',\n",
              " 'who',\n",
              " ',',\n",
              " 'using',\n",
              " 'spin',\n",
              " '##al',\n",
              " 'ana',\n",
              " '##est',\n",
              " '##hes',\n",
              " '##ia',\n",
              " 'and',\n",
              " 'anal',\n",
              " 'dil',\n",
              " '##ata',\n",
              " '##tion',\n",
              " ',',\n",
              " 'treated',\n",
              " 'the',\n",
              " 'recta',\n",
              " '##l',\n",
              " 'am',\n",
              " '##pul',\n",
              " '##la',\n",
              " 'for',\n",
              " '10',\n",
              " 'minutes',\n",
              " 'with',\n",
              " 'a',\n",
              " '10',\n",
              " '%',\n",
              " 'formal',\n",
              " '##in',\n",
              " 'solution',\n",
              " '200',\n",
              " 'ml',\n",
              " '+',\n",
              " '300',\n",
              " 'ml',\n",
              " 'of',\n",
              " 'water',\n",
              " ';',\n",
              " 'the',\n",
              " 'patient',\n",
              " \"'\",\n",
              " 's',\n",
              " 'tol',\n",
              " '##erance',\n",
              " 'to',\n",
              " 'the',\n",
              " 'procedure',\n",
              " 'was',\n",
              " 'excellent',\n",
              " ',',\n",
              " 'it',\n",
              " 'was',\n",
              " 'carried',\n",
              " 'out',\n",
              " 'without',\n",
              " 'any',\n",
              " 'com',\n",
              " '##plications',\n",
              " 'and',\n",
              " 'from',\n",
              " 'that',\n",
              " 'moment',\n",
              " 'onwards',\n",
              " 'the',\n",
              " 'patient',\n",
              " 'was',\n",
              " 'completely',\n",
              " 'as',\n",
              " '##ym',\n",
              " '##pt',\n",
              " '##oma',\n",
              " '##tic',\n",
              " 'without',\n",
              " 'new',\n",
              " 'episodes',\n",
              " 'of',\n",
              " 'hae',\n",
              " '##mor',\n",
              " '##r',\n",
              " '##ha',\n",
              " '##gic',\n",
              " 'external',\n",
              " '##isation',\n",
              " ',',\n",
              " 'hae',\n",
              " '##mo',\n",
              " '##dyn',\n",
              " '##ami',\n",
              " '##c',\n",
              " 'ins',\n",
              " '##tab',\n",
              " '##ility',\n",
              " 'or',\n",
              " 'new',\n",
              " 'trans',\n",
              " '##fus',\n",
              " '##ion',\n",
              " 'requirements',\n",
              " ';',\n",
              " 'she',\n",
              " 'was',\n",
              " 'disc',\n",
              " '##harge',\n",
              " '##d',\n",
              " 'and',\n",
              " 'a',\n",
              " 'follow',\n",
              " '-',\n",
              " 'up',\n",
              " 'colonos',\n",
              " '##co',\n",
              " '##py',\n",
              " 'was',\n",
              " 'proposed',\n",
              " 'after',\n",
              " 'the',\n",
              " 'treatment',\n",
              " ',',\n",
              " 'but',\n",
              " 'she',\n",
              " 'did',\n",
              " 'not',\n",
              " 'accept',\n",
              " '.',\n",
              " 'four',\n",
              " 'months',\n",
              " 'later',\n",
              " ',',\n",
              " 'she',\n",
              " 'was',\n",
              " 'admitted',\n",
              " 'again',\n",
              " 'for',\n",
              " 'clinical',\n",
              " 'signs',\n",
              " 'of',\n",
              " 'tu',\n",
              " '##mou',\n",
              " '##r',\n",
              " 'progression',\n",
              " ',',\n",
              " 'and',\n",
              " 'sy',\n",
              " '##mpt',\n",
              " '##oma',\n",
              " '##tic',\n",
              " 'treatment',\n",
              " 'was',\n",
              " 'decided',\n",
              " 'by',\n",
              " 'the',\n",
              " 'on',\n",
              " '##cology',\n",
              " 'department',\n",
              " ',',\n",
              " 'and',\n",
              " 'the',\n",
              " 'patient',\n",
              " 'died',\n",
              " ',',\n",
              " 'but',\n",
              " 'without',\n",
              " 're',\n",
              " '##cu',\n",
              " '##rren',\n",
              " '##ce',\n",
              " 'of',\n",
              " 'the',\n",
              " 'rector',\n",
              " '##r',\n",
              " '##ha',\n",
              " '##gia',\n",
              " '.',\n",
              " '[SEP]']"
            ]
          },
          "metadata": {},
          "execution_count": 91
        }
      ],
      "source": [
        "inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
        "inputs.tokens()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 92,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EQskMV19KpLh",
        "outputId": "9fbf1919-5853-4cd7-ea80-475edeb3b482"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
          ]
        }
      ],
      "source": [
        "labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "word_ids = inputs.word_ids()\n",
        "print(labels)\n",
        "print(align_labels_with_tokens(labels, word_ids))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 93,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 81,
          "referenced_widgets": [
            "84631a71d2ca4ae8a781019ea3ce6da9",
            "9666c2ff4f32449ea3cbef076a166836",
            "efc4af4547804d9daae235691942e73a",
            "c8194cd34f554a789359eab6e7596291",
            "54f649d297ec4456be5b5df14497fb93",
            "e35bc6f834c4490e85ac2ae25d9e922f",
            "b166c61328bb49ea803f0d3a7d515d81",
            "a2aac740ef3b4f3c913b71c82b408c2c",
            "d77e188e43dc4e01b82054f2a6a8e832",
            "266b2c90bb4d41198784f016e996066a",
            "cc62b20cee8c4a4b8e24576d1c854fbf",
            "98bebe04cb254369bb3b6b991d4b2648",
            "40080df663cc43749963657150cf632d",
            "a04f157a98db4d47b75094b6ef1b0990",
            "ba2967950f4c483ea399827046f52963",
            "a9e0ad6a141a462fb9bea1c18d447332",
            "31fbaf0ffb0845f5800e6fca0353b929",
            "4a2c17e757d34547a4a68718ef064073",
            "f74d219071ab49479194f1061bf343be",
            "a92097360dba4d5c848b48e345b0028e",
            "24a164b22a8f4e00944ef05bcec5d032",
            "976ade0b37cd43e2aa5aa272dac2445b"
          ]
        },
        "id": "z_6q0eitKpLh",
        "outputId": "4c9c0ce3-5855-49e3-c4da-bbe068361c63"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/1 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "84631a71d2ca4ae8a781019ea3ce6da9"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/1 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "98bebe04cb254369bb3b6b991d4b2648"
            }
          },
          "metadata": {}
        }
      ],
      "source": [
        "tokenized_datasets = raw_datasets.map(\n",
        "    tokenize_and_align_labels,\n",
        "    batched=True,\n",
        "    remove_columns=raw_datasets[\"train\"].column_names,\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 94,
      "metadata": {
        "id": "K3GGBsIIKpLh"
      },
      "outputs": [],
      "source": [
        "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 95,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1M1bHT_pKpLi",
        "outputId": "19fc29d4-cd88-4fe7-e963-71a59d0434ca"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[-100,    0,    0,  ...,    0,    0, -100],\n",
              "        [-100,    0,    0,  ..., -100, -100, -100]])"
            ]
          },
          "metadata": {},
          "execution_count": 95
        }
      ],
      "source": [
        "batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
        "batch[\"labels\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 96,
      "metadata": {
        "id": "maAO4nNXKpLi"
      },
      "outputs": [],
      "source": [
        "id2label = {str(i): label for i, label in enumerate(label_names)}\n",
        "label2id = {v: k for k, v in id2label.items()}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 97,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 954,
          "referenced_widgets": [
            "21b02caa5dc146b8ac2bd1a282381c7f",
            "0c01b39e34744f74ae47d0d4e70638ce",
            "82e7357418144359abba3548449c0c08",
            "2ff5e18b6d684b99a82676dbf3db6d32",
            "cd75e771337843d9b55838502bed9a1b",
            "b6a4250c705f4dd8b9f52731cce2a23d",
            "4549eb0838864025ac6a0f3da9192818",
            "b2d377844c1a4bc09433a94088f5213e",
            "fe7e058b9a6944969d83f7e72b398bb1",
            "47d5ccd1eafe4ea1a3476e06d998bd74",
            "5a17027205cb4c2bbe140e1e96e4b495"
          ]
        },
        "id": "Q0T8WOGBKpLi",
        "outputId": "d950423b-480b-495e-f02e-667ec124dcb4"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading:   0%|          | 0.00/709M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "21b02caa5dc146b8ac2bd1a282381c7f"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/pytorch_model.bin\n",
            "All model checkpoint weights were used when initializing BertForTokenClassification.\n",
            "\n",
            "All the weights of BertForTokenClassification were initialized from the model checkpoint at pucpr/clinicalnerpt-medical.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForTokenClassification for predictions without further training.\n"
          ]
        }
      ],
      "source": [
        "model = AutoModelForTokenClassification.from_pretrained(    \n",
        "    model_checkpoint\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 98,
      "metadata": {
        "id": "k6ExcF0UKpLi",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "68661940-56bd-4adc-ec64-37f1db50604c"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "PyTorch: setting up devices\n",
            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
          ]
        }
      ],
      "source": [
        "args = TrainingArguments(\n",
        "    \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
        "    evaluation_strategy = IntervalStrategy.STEPS,\n",
        "    eval_steps = 50,\n",
        "    learning_rate=5e-5,\n",
        "    num_train_epochs=50,\n",
        "    weight_decay=0.01,\n",
        "    metric_for_best_model = 'f1',\n",
        "   load_best_model_at_end=True\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 99,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "gQSIB3FfKpLj",
        "outputId": "72ede653-dc87-4230-94b2-805a52f1f5d2"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:310: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  FutureWarning,\n",
            "***** Running training *****\n",
            "  Num examples = 555\n",
            "  Num Epochs = 50\n",
            "  Instantaneous batch size per device = 8\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 3500\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='1150' max='3500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [1150/3500 05:32 < 11:20, 3.45 it/s, Epoch 16/50]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Precision</th>\n",
              "      <th>Recall</th>\n",
              "      <th>F1</th>\n",
              "      <th>Accuracy</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>50</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.160077</td>\n",
              "      <td>0.281675</td>\n",
              "      <td>0.325786</td>\n",
              "      <td>0.302129</td>\n",
              "      <td>0.934798</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.156098</td>\n",
              "      <td>0.440468</td>\n",
              "      <td>0.425786</td>\n",
              "      <td>0.433003</td>\n",
              "      <td>0.941619</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>150</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.175201</td>\n",
              "      <td>0.430540</td>\n",
              "      <td>0.491195</td>\n",
              "      <td>0.458872</td>\n",
              "      <td>0.943305</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>200</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.181505</td>\n",
              "      <td>0.465023</td>\n",
              "      <td>0.438994</td>\n",
              "      <td>0.451634</td>\n",
              "      <td>0.944750</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>250</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.198468</td>\n",
              "      <td>0.437112</td>\n",
              "      <td>0.487421</td>\n",
              "      <td>0.460898</td>\n",
              "      <td>0.942113</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>300</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.213199</td>\n",
              "      <td>0.454245</td>\n",
              "      <td>0.518239</td>\n",
              "      <td>0.484136</td>\n",
              "      <td>0.943191</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>350</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.225870</td>\n",
              "      <td>0.439457</td>\n",
              "      <td>0.529560</td>\n",
              "      <td>0.480319</td>\n",
              "      <td>0.941974</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>400</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.252911</td>\n",
              "      <td>0.509901</td>\n",
              "      <td>0.453459</td>\n",
              "      <td>0.480027</td>\n",
              "      <td>0.945802</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>450</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.248783</td>\n",
              "      <td>0.475821</td>\n",
              "      <td>0.501258</td>\n",
              "      <td>0.488208</td>\n",
              "      <td>0.945092</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>500</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.230288</td>\n",
              "      <td>0.441730</td>\n",
              "      <td>0.545912</td>\n",
              "      <td>0.488326</td>\n",
              "      <td>0.941631</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>550</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.257168</td>\n",
              "      <td>0.470556</td>\n",
              "      <td>0.537736</td>\n",
              "      <td>0.501908</td>\n",
              "      <td>0.944002</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>600</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.264445</td>\n",
              "      <td>0.509816</td>\n",
              "      <td>0.506289</td>\n",
              "      <td>0.508047</td>\n",
              "      <td>0.946145</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>650</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.277278</td>\n",
              "      <td>0.448800</td>\n",
              "      <td>0.576101</td>\n",
              "      <td>0.504544</td>\n",
              "      <td>0.943115</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>700</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.278563</td>\n",
              "      <td>0.504016</td>\n",
              "      <td>0.473585</td>\n",
              "      <td>0.488327</td>\n",
              "      <td>0.946145</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>750</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.295773</td>\n",
              "      <td>0.501239</td>\n",
              "      <td>0.508805</td>\n",
              "      <td>0.504994</td>\n",
              "      <td>0.946436</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>800</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.300880</td>\n",
              "      <td>0.491329</td>\n",
              "      <td>0.534591</td>\n",
              "      <td>0.512048</td>\n",
              "      <td>0.945333</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>850</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.315745</td>\n",
              "      <td>0.503695</td>\n",
              "      <td>0.514465</td>\n",
              "      <td>0.509023</td>\n",
              "      <td>0.944154</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>900</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.303611</td>\n",
              "      <td>0.461154</td>\n",
              "      <td>0.522642</td>\n",
              "      <td>0.489976</td>\n",
              "      <td>0.944839</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>950</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>0.309813</td>\n",
              "      <td>0.502151</td>\n",
              "      <td>0.513836</td>\n",
              "      <td>0.507927</td>\n",
              "      <td>0.947552</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1000</td>\n",
              "      <td>0.009800</td>\n",
              "      <td>0.335969</td>\n",
              "      <td>0.520309</td>\n",
              "      <td>0.507547</td>\n",
              "      <td>0.513849</td>\n",
              "      <td>0.946829</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1050</td>\n",
              "      <td>0.009800</td>\n",
              "      <td>0.313057</td>\n",
              "      <td>0.484645</td>\n",
              "      <td>0.545912</td>\n",
              "      <td>0.513458</td>\n",
              "      <td>0.946018</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1100</td>\n",
              "      <td>0.009800</td>\n",
              "      <td>0.297767</td>\n",
              "      <td>0.478187</td>\n",
              "      <td>0.530818</td>\n",
              "      <td>0.503130</td>\n",
              "      <td>0.947780</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1150</td>\n",
              "      <td>0.009800</td>\n",
              "      <td>0.317451</td>\n",
              "      <td>0.496936</td>\n",
              "      <td>0.510063</td>\n",
              "      <td>0.503414</td>\n",
              "      <td>0.946601</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 186\n",
            "  Batch size = 8\n",
            "\n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000 (score: 0.5138490926456541).\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=1150, training_loss=0.03881311048632083, metrics={'train_runtime': 332.8101, 'train_samples_per_second': 83.381, 'train_steps_per_second': 10.517, 'total_flos': 2382353890443360.0, 'train_loss': 0.03881311048632083, 'epoch': 16.43})"
            ]
          },
          "metadata": {},
          "execution_count": 99
        }
      ],
      "source": [
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=args,\n",
        "    train_dataset=tokenized_datasets[\"train\"],\n",
        "    eval_dataset=tokenized_datasets[\"validation\"],\n",
        "    data_collator=data_collator,\n",
        "    compute_metrics=compute_metrics,\n",
        "    tokenizer=tokenizer,\n",
        "    callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
        ")\n",
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 100,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "arJA0rVIKpLj",
        "outputId": "12b3b4d0-2f59-4f46-9f20-e2aa3cd35398"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Saving model checkpoint to model/multilingual-BERT-all-tokens\n",
            "Configuration saved in model/multilingual-BERT-all-tokens/config.json\n",
            "Model weights saved in model/multilingual-BERT-all-tokens/pytorch_model.bin\n",
            "tokenizer config file saved in model/multilingual-BERT-all-tokens/tokenizer_config.json\n",
            "Special tokens file saved in model/multilingual-BERT-all-tokens/special_tokens_map.json\n"
          ]
        }
      ],
      "source": [
        "trainer.save_model('model/multilingual-BERT-all-tokens')"
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "D3Bdj1H9TxOp"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "kCThHmWQWZ57"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Model 2 - pucpr/clinicalnerpt-medical\n",
        "\n",
        "### Sentence Based Modelling"
      ],
      "metadata": {
        "id": "5fN4MRRx_fFZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
      ],
      "metadata": {
        "id": "_E60UP1w_fFZ"
      },
      "execution_count": 101,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": 102,
      "metadata": {
        "id": "DY7gaYbj_fFa"
      },
      "outputs": [],
      "source": [
        "dataset = Dataset.from_dict(dic)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 103,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "44a9f0cc-c1cf-4064-aa89-baffa435cf52",
        "id": "FcLEVVcn_fFa"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 11668\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 103
        }
      ],
      "source": [
        "dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 104,
      "metadata": {
        "id": "vZqWyVo2_fFa"
      },
      "outputs": [],
      "source": [
        "#For training, validation, and test partitions\n",
        "\"\"\"\n",
        "#Train, val, test partitions\n",
        "train_test = dataset.train_test_split()\n",
        "test_val = train_test['test'].train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': test_val['train'],\n",
        "    'test': test_val['test']\n",
        "    })\n",
        "\"\"\"\n",
        "\n",
        "#Just for training and validation partitions\n",
        "train_test = dataset.train_test_split()\n",
        "raw_datasets = DatasetDict({\n",
        "    'train': train_test['train'],\n",
        "    'validation': train_test['test']\n",
        "    })"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 105,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ddadc450-b61a-43f9-e5fd-90c8db32de6b",
        "id": "GDv9hwpm_fFa"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "DatasetDict({\n",
              "    train: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 8751\n",
              "    })\n",
              "    validation: Dataset({\n",
              "        features: ['tokens', 'ner_tags'],\n",
              "        num_rows: 2917\n",
              "    })\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 105
        }
      ],
      "source": [
        "raw_datasets"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 106,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "54b152d5-edaa-482d-afb5-a209403c7dbc",
        "id": "ZnB_fXql_fFa"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
            ]
          },
          "metadata": {},
          "execution_count": 106
        }
      ],
      "source": [
        "raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#raw_datasets[\"train\"][0][\"chunk_tags\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 107,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a7beafe3-27ce-4e03-904a-3b8a8e06d831",
        "id": "egmgQAVt_fFa"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['tokens', 'ner_tags'],\n",
              "    num_rows: 8751\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 107
        }
      ],
      "source": [
        "raw_datasets['train']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 108,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e0cd0121-e53b-4d8c-9b33-2d093d604378",
        "id": "Hize89uK_fFa"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['O', 'B', 'I']"
            ]
          },
          "metadata": {},
          "execution_count": 108
        }
      ],
      "source": [
        "label_names = ['O','B','I']\n",
        "label_names"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 109,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a9fe31c8-45c9-4873-fb2c-cf24bd7c64a9",
        "id": "bQf54Sst_fFa"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Since 2006 she had tried several times to lose weight , without success . \n",
            "O     O    O   O   O     O       O     O  O    O      O O       O       O \n"
          ]
        }
      ],
      "source": [
        "words = raw_datasets[\"train\"][0][\"tokens\"]\n",
        "labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
        "#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
        "#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
        "line1 = \"\"\n",
        "line2 = \"\"\n",
        "for word, label in zip(words, labels):\n",
        "    full_label = label_names[label]\n",
        "    max_length = max(len(word), len(full_label))\n",
        "    line1 += word + \" \" * (max_length - len(word) + 1)\n",
        "    line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
        "\n",
        "print(line1)\n",
        "print(line2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 110,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "53863be6-1112-4c14-b34b-529492b236dc",
        "id": "cNKxSKCT_fFb"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n",
            "loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/vocab.txt\n",
            "loading file tokenizer.json from cache at None\n",
            "loading file added_tokens.json from cache at None\n",
            "loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/special_tokens_map.json\n",
            "loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/tokenizer_config.json\n",
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n",
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n"
          ]
        }
      ],
      "source": [
        "model_checkpoint = \"pucpr/clinicalnerpt-medical\"\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 111,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1b408280-d763-4487-e0c0-06823ddc8daa",
        "id": "Gm0lsFPN_fFb"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 111
        }
      ],
      "source": [
        "tokenizer.is_fast"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 112,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a3d8f036-bf64-4f4d-e793-9244e9bd1043",
        "id": "5Awp28lM_fFb"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['[CLS]',\n",
              " 'since',\n",
              " '2006',\n",
              " 'she',\n",
              " 'had',\n",
              " 'tried',\n",
              " 'several',\n",
              " 'times',\n",
              " 'to',\n",
              " 'lose',\n",
              " 'weight',\n",
              " ',',\n",
              " 'without',\n",
              " 'success',\n",
              " '.',\n",
              " '[SEP]']"
            ]
          },
          "metadata": {},
          "execution_count": 112
        }
      ],
      "source": [
        "inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
        "inputs.tokens()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 113,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "65b0481e-76fe-4f0a-e7e6-daa6cee662d1",
        "id": "Oqi5M1ll_fFb"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
          ]
        }
      ],
      "source": [
        "labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
        "word_ids = inputs.word_ids()\n",
        "print(labels)\n",
        "print(align_labels_with_tokens(labels, word_ids))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 114,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 81,
          "referenced_widgets": [
            "49642b493a2d4c3592ed010663ef789c",
            "fcc50d75dfb04fccb26c9b93bf8f1efa",
            "dc630459e6564d69833d46b63493a160",
            "31d0648af26b4fe797f2cb2ff21336a8",
            "90e567bcc88445f695a896af6d8da649",
            "2e815baaae8940ffb90e2aaf5c6f7e2a",
            "32250602bfc140d18859e6b48f9dbfbc",
            "a85179eeb7d94bba8c79a10a734d8c6b",
            "7346605d4df548afb179d489217990ff",
            "6b1a06b7d6ea43da8b9af7f92a882455",
            "a1b4b4cc26ff4c50b4ef5c99d8c7cb3e",
            "7b3ce57b6b5e4253b8219adc2c6ff47e",
            "ee7bc9576ea44b97a6ad9d2ae8adaec7",
            "c012f774a21e44188365f5b0646b422e",
            "5899086ed64c4d3185374a7f541e22fe",
            "3aaf8990b0574a9183b9902eb33670a5",
            "cc3d6c6b95ab4c80983b3ce2175acb8a",
            "873c876ae8f64b9bba4f25efa3a3859a",
            "d727d1396df2443e9e46ab7e0c7d5276",
            "cda8e993793c4b949910e309a3f50a03",
            "5f38c6b988b44678a7b7f06a99daa983",
            "7b53cf7c82a6439eb94d5f0635afe2f3"
          ]
        },
        "outputId": "774134e7-c481-40c4-df12-902897c28ad6",
        "id": "fVvvny2D_fFb"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/9 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "49642b493a2d4c3592ed010663ef789c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/3 [00:00<?, ?ba/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "7b3ce57b6b5e4253b8219adc2c6ff47e"
            }
          },
          "metadata": {}
        }
      ],
      "source": [
        "tokenized_datasets = raw_datasets.map(\n",
        "    tokenize_and_align_labels,\n",
        "    batched=True,\n",
        "    remove_columns=raw_datasets[\"train\"].column_names,\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 115,
      "metadata": {
        "id": "MKc1_p7W_fFb"
      },
      "outputs": [],
      "source": [
        "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 116,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e4e50bbf-a7bd-485b-834b-05949304f311",
        "id": "-u-nUkGH_fFb"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[-100,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
              "            0,    0,    0, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n",
              "         -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100],\n",
              "        [-100,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
              "            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
              "            0,    1,    2,    2,    2,    2,    2,    2,    2,    2,    0, -100]])"
            ]
          },
          "metadata": {},
          "execution_count": 116
        }
      ],
      "source": [
        "batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
        "batch[\"labels\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 117,
      "metadata": {
        "id": "_hW6-lz2_fFb"
      },
      "outputs": [],
      "source": [
        "id2label = {str(i): label for i, label in enumerate(label_names)}\n",
        "label2id = {v: k for k, v in id2label.items()}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 118,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4b8291fc-97e1-4d18-9713-7350ec2f075e",
        "id": "7zhu1mtB_fFb"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
            "  \"_num_labels\": 3,\n",
            "  \"architectures\": [\n",
            "    \"BertForTokenClassification\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"directionality\": \"bidi\",\n",
            "  \"eos_token_ids\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"O\",\n",
            "    \"1\": \"B-MedicalDevice\",\n",
            "    \"2\": \"I-MedicalDevice\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"B-MedicalDevice\": 1,\n",
            "    \"I-MedicalDevice\": 2,\n",
            "    \"O\": 0\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": null,\n",
            "  \"pooler_fc_size\": 768,\n",
            "  \"pooler_num_attention_heads\": 12,\n",
            "  \"pooler_num_fc_layers\": 3,\n",
            "  \"pooler_size_per_head\": 128,\n",
            "  \"pooler_type\": \"first_token_transform\",\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 119547\n",
            "}\n",
            "\n",
            "loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/pytorch_model.bin\n",
            "All model checkpoint weights were used when initializing BertForTokenClassification.\n",
            "\n",
            "All the weights of BertForTokenClassification were initialized from the model checkpoint at pucpr/clinicalnerpt-medical.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForTokenClassification for predictions without further training.\n"
          ]
        }
      ],
      "source": [
        "model = AutoModelForTokenClassification.from_pretrained(    \n",
        "    model_checkpoint\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 119,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7f9279a0-4c0a-41ea-c39c-caf895886d2a",
        "id": "-4pqS6QR_fFc"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "PyTorch: setting up devices\n",
            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
          ]
        }
      ],
      "source": [
        "args = TrainingArguments(\n",
        "    \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
        "    evaluation_strategy = IntervalStrategy.STEPS,\n",
        "    eval_steps = 50,\n",
        "    learning_rate=5e-5,\n",
        "    num_train_epochs=50,\n",
        "    weight_decay=0.01,\n",
        "    metric_for_best_model = 'f1',\n",
        "   load_best_model_at_end=True\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 120,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "438ca67b-ebf6-49f3-f3ad-03827a51e196",
        "id": "Xrf-cYnW_fFc"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:310: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  FutureWarning,\n",
            "***** Running training *****\n",
            "  Num examples = 8751\n",
            "  Num Epochs = 50\n",
            "  Instantaneous batch size per device = 8\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 54700\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='2750' max='54700' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [ 2750/54700 10:53 < 3:26:01, 4.20 it/s, Epoch 2/50]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Precision</th>\n",
              "      <th>Recall</th>\n",
              "      <th>F1</th>\n",
              "      <th>Accuracy</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>50</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.213414</td>\n",
              "      <td>0.235529</td>\n",
              "      <td>0.183135</td>\n",
              "      <td>0.206054</td>\n",
              "      <td>0.927267</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.185129</td>\n",
              "      <td>0.241594</td>\n",
              "      <td>0.200724</td>\n",
              "      <td>0.219271</td>\n",
              "      <td>0.927296</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>150</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.175822</td>\n",
              "      <td>0.340069</td>\n",
              "      <td>0.304708</td>\n",
              "      <td>0.321419</td>\n",
              "      <td>0.933018</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>200</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.175697</td>\n",
              "      <td>0.278355</td>\n",
              "      <td>0.332644</td>\n",
              "      <td>0.303087</td>\n",
              "      <td>0.932284</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>250</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.183824</td>\n",
              "      <td>0.331652</td>\n",
              "      <td>0.442318</td>\n",
              "      <td>0.379073</td>\n",
              "      <td>0.931647</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>300</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.197462</td>\n",
              "      <td>0.431220</td>\n",
              "      <td>0.228660</td>\n",
              "      <td>0.298851</td>\n",
              "      <td>0.932342</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>350</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.167895</td>\n",
              "      <td>0.386070</td>\n",
              "      <td>0.433006</td>\n",
              "      <td>0.408193</td>\n",
              "      <td>0.937046</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>400</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.190235</td>\n",
              "      <td>0.179594</td>\n",
              "      <td>0.201242</td>\n",
              "      <td>0.189802</td>\n",
              "      <td>0.930726</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>450</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.182239</td>\n",
              "      <td>0.341333</td>\n",
              "      <td>0.463528</td>\n",
              "      <td>0.393155</td>\n",
              "      <td>0.936134</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>500</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.161157</td>\n",
              "      <td>0.377800</td>\n",
              "      <td>0.383859</td>\n",
              "      <td>0.380806</td>\n",
              "      <td>0.938074</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>550</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.158413</td>\n",
              "      <td>0.422494</td>\n",
              "      <td>0.375065</td>\n",
              "      <td>0.397369</td>\n",
              "      <td>0.940534</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>600</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.155527</td>\n",
              "      <td>0.395566</td>\n",
              "      <td>0.350750</td>\n",
              "      <td>0.371812</td>\n",
              "      <td>0.939897</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>650</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.159286</td>\n",
              "      <td>0.323269</td>\n",
              "      <td>0.321262</td>\n",
              "      <td>0.322263</td>\n",
              "      <td>0.938202</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>700</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.171928</td>\n",
              "      <td>0.381213</td>\n",
              "      <td>0.503880</td>\n",
              "      <td>0.434046</td>\n",
              "      <td>0.933910</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>750</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.147573</td>\n",
              "      <td>0.323820</td>\n",
              "      <td>0.305225</td>\n",
              "      <td>0.314248</td>\n",
              "      <td>0.942298</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>800</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.148027</td>\n",
              "      <td>0.425638</td>\n",
              "      <td>0.439731</td>\n",
              "      <td>0.432570</td>\n",
              "      <td>0.943326</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>850</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.147193</td>\n",
              "      <td>0.408436</td>\n",
              "      <td>0.395758</td>\n",
              "      <td>0.401997</td>\n",
              "      <td>0.943993</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>900</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.164249</td>\n",
              "      <td>0.404822</td>\n",
              "      <td>0.495085</td>\n",
              "      <td>0.445427</td>\n",
              "      <td>0.943277</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>950</td>\n",
              "      <td>0.197700</td>\n",
              "      <td>0.155629</td>\n",
              "      <td>0.408925</td>\n",
              "      <td>0.289188</td>\n",
              "      <td>0.338788</td>\n",
              "      <td>0.940769</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1000</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.185534</td>\n",
              "      <td>0.289720</td>\n",
              "      <td>0.080186</td>\n",
              "      <td>0.125608</td>\n",
              "      <td>0.925875</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1050</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.163427</td>\n",
              "      <td>0.402687</td>\n",
              "      <td>0.511640</td>\n",
              "      <td>0.450672</td>\n",
              "      <td>0.936379</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1100</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.148432</td>\n",
              "      <td>0.446659</td>\n",
              "      <td>0.428867</td>\n",
              "      <td>0.437582</td>\n",
              "      <td>0.944943</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1150</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.160113</td>\n",
              "      <td>0.457825</td>\n",
              "      <td>0.426798</td>\n",
              "      <td>0.441767</td>\n",
              "      <td>0.943307</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1200</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.167280</td>\n",
              "      <td>0.462996</td>\n",
              "      <td>0.265391</td>\n",
              "      <td>0.337389</td>\n",
              "      <td>0.939074</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1250</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.147611</td>\n",
              "      <td>0.443478</td>\n",
              "      <td>0.448526</td>\n",
              "      <td>0.445988</td>\n",
              "      <td>0.946579</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1300</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.147158</td>\n",
              "      <td>0.447046</td>\n",
              "      <td>0.434558</td>\n",
              "      <td>0.440714</td>\n",
              "      <td>0.945070</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1350</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.184782</td>\n",
              "      <td>0.432701</td>\n",
              "      <td>0.495603</td>\n",
              "      <td>0.462021</td>\n",
              "      <td>0.942092</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1400</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.164378</td>\n",
              "      <td>0.409772</td>\n",
              "      <td>0.381790</td>\n",
              "      <td>0.395287</td>\n",
              "      <td>0.944522</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1450</td>\n",
              "      <td>0.165700</td>\n",
              "      <td>0.161122</td>\n",
              "      <td>0.489035</td>\n",
              "      <td>0.461459</td>\n",
              "      <td>0.474847</td>\n",
              "      <td>0.946138</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1500</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.151911</td>\n",
              "      <td>0.395260</td>\n",
              "      <td>0.483187</td>\n",
              "      <td>0.434823</td>\n",
              "      <td>0.944914</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1550</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.160618</td>\n",
              "      <td>0.470175</td>\n",
              "      <td>0.485256</td>\n",
              "      <td>0.477597</td>\n",
              "      <td>0.945551</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1600</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.153152</td>\n",
              "      <td>0.458456</td>\n",
              "      <td>0.485256</td>\n",
              "      <td>0.471475</td>\n",
              "      <td>0.944796</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1650</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.159076</td>\n",
              "      <td>0.481323</td>\n",
              "      <td>0.406622</td>\n",
              "      <td>0.440830</td>\n",
              "      <td>0.946981</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1700</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.147369</td>\n",
              "      <td>0.451056</td>\n",
              "      <td>0.486291</td>\n",
              "      <td>0.468011</td>\n",
              "      <td>0.948882</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1750</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.157782</td>\n",
              "      <td>0.416149</td>\n",
              "      <td>0.554578</td>\n",
              "      <td>0.475493</td>\n",
              "      <td>0.942082</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1800</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.139047</td>\n",
              "      <td>0.469262</td>\n",
              "      <td>0.473875</td>\n",
              "      <td>0.471557</td>\n",
              "      <td>0.948186</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1850</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.169168</td>\n",
              "      <td>0.467036</td>\n",
              "      <td>0.392137</td>\n",
              "      <td>0.426322</td>\n",
              "      <td>0.947804</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1900</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.190142</td>\n",
              "      <td>0.451332</td>\n",
              "      <td>0.482152</td>\n",
              "      <td>0.466233</td>\n",
              "      <td>0.946511</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1950</td>\n",
              "      <td>0.133400</td>\n",
              "      <td>0.142872</td>\n",
              "      <td>0.489979</td>\n",
              "      <td>0.480600</td>\n",
              "      <td>0.485244</td>\n",
              "      <td>0.948431</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2000</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.141305</td>\n",
              "      <td>0.490028</td>\n",
              "      <td>0.444904</td>\n",
              "      <td>0.466377</td>\n",
              "      <td>0.948823</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2050</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.154673</td>\n",
              "      <td>0.441645</td>\n",
              "      <td>0.516813</td>\n",
              "      <td>0.476281</td>\n",
              "      <td>0.944257</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2100</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.161482</td>\n",
              "      <td>0.559639</td>\n",
              "      <td>0.417486</td>\n",
              "      <td>0.478222</td>\n",
              "      <td>0.948402</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2150</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.155931</td>\n",
              "      <td>0.482567</td>\n",
              "      <td>0.494051</td>\n",
              "      <td>0.488241</td>\n",
              "      <td>0.946942</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2200</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.180343</td>\n",
              "      <td>0.503902</td>\n",
              "      <td>0.467667</td>\n",
              "      <td>0.485109</td>\n",
              "      <td>0.947011</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2250</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.173788</td>\n",
              "      <td>0.474372</td>\n",
              "      <td>0.469219</td>\n",
              "      <td>0.471782</td>\n",
              "      <td>0.945678</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2300</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.172453</td>\n",
              "      <td>0.415626</td>\n",
              "      <td>0.533885</td>\n",
              "      <td>0.467391</td>\n",
              "      <td>0.942180</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2350</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.177914</td>\n",
              "      <td>0.406309</td>\n",
              "      <td>0.553026</td>\n",
              "      <td>0.468449</td>\n",
              "      <td>0.935782</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2400</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.177702</td>\n",
              "      <td>0.452438</td>\n",
              "      <td>0.484739</td>\n",
              "      <td>0.468032</td>\n",
              "      <td>0.947794</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2450</td>\n",
              "      <td>0.127700</td>\n",
              "      <td>0.149222</td>\n",
              "      <td>0.504098</td>\n",
              "      <td>0.445422</td>\n",
              "      <td>0.472947</td>\n",
              "      <td>0.947657</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2500</td>\n",
              "      <td>0.106000</td>\n",
              "      <td>0.176770</td>\n",
              "      <td>0.487315</td>\n",
              "      <td>0.476979</td>\n",
              "      <td>0.482092</td>\n",
              "      <td>0.947148</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2550</td>\n",
              "      <td>0.106000</td>\n",
              "      <td>0.159687</td>\n",
              "      <td>0.442256</td>\n",
              "      <td>0.511123</td>\n",
              "      <td>0.474202</td>\n",
              "      <td>0.944669</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2600</td>\n",
              "      <td>0.106000</td>\n",
              "      <td>0.185752</td>\n",
              "      <td>0.529092</td>\n",
              "      <td>0.503363</td>\n",
              "      <td>0.515907</td>\n",
              "      <td>0.949421</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2650</td>\n",
              "      <td>0.106000</td>\n",
              "      <td>0.166756</td>\n",
              "      <td>0.449671</td>\n",
              "      <td>0.494568</td>\n",
              "      <td>0.471052</td>\n",
              "      <td>0.945943</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2700</td>\n",
              "      <td>0.106000</td>\n",
              "      <td>0.174073</td>\n",
              "      <td>0.432521</td>\n",
              "      <td>0.477496</td>\n",
              "      <td>0.453897</td>\n",
              "      <td>0.946609</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2750</td>\n",
              "      <td>0.106000</td>\n",
              "      <td>0.186960</td>\n",
              "      <td>0.526675</td>\n",
              "      <td>0.439214</td>\n",
              "      <td>0.478984</td>\n",
              "      <td>0.947981</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500\n",
            "Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/config.json\n",
            "Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/pytorch_model.bin\n",
            "tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/tokenizer_config.json\n",
            "Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/special_tokens_map.json\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 2917\n",
            "  Batch size = 8\n",
            "\n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500 (score: 0.48209150326797384).\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=2750, training_loss=0.14159858218106355, metrics={'train_runtime': 653.9792, 'train_samples_per_second': 669.058, 'train_steps_per_second': 83.642, 'total_flos': 861581789561556.0, 'train_loss': 0.14159858218106355, 'epoch': 2.51})"
            ]
          },
          "metadata": {},
          "execution_count": 120
        }
      ],
      "source": [
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=args,\n",
        "    train_dataset=tokenized_datasets[\"train\"],\n",
        "    eval_dataset=tokenized_datasets[\"validation\"],\n",
        "    data_collator=data_collator,\n",
        "    compute_metrics=compute_metrics,\n",
        "    tokenizer=tokenizer,\n",
        "    callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
        ")\n",
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 121,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "977619c0-4fc0-423f-f47a-b308c9fb1ecd",
        "id": "RdLDXL1K_fFc"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Saving model checkpoint to model/multilingual-BERT-sentence\n",
            "Configuration saved in model/multilingual-BERT-sentence/config.json\n",
            "Model weights saved in model/multilingual-BERT-sentence/pytorch_model.bin\n",
            "tokenizer config file saved in model/multilingual-BERT-sentence/tokenizer_config.json\n",
            "Special tokens file saved in model/multilingual-BERT-sentence/special_tokens_map.json\n"
          ]
        }
      ],
      "source": [
        "trainer.save_model('model/multilingual-BERT-sentence')"
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "kFOvQa4MCQdW"
      },
      "execution_count": 166,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.2"
    },
    "colab": {
      "provenance": [],
      "collapsed_sections": [],
      "machine_shape": "hm"
    },
    "accelerator": "GPU",
    "gpuClass": "premium",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "b3fe12ca95e84b198d16bdb4d20f9ad9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_1b7f8f1786394c01bad4a8589ad16513",
              "IPY_MODEL_70e437b3ba294189b4799c6607532ebd",
              "IPY_MODEL_0fb47d91dbf9497cac1ffc1c5dfd4519"
            ],
            "layout": "IPY_MODEL_9cfec0f21c0a459f9f5888c389a6a479"
          }
        },
        "1b7f8f1786394c01bad4a8589ad16513": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ef66098fb5f748eabe11abc3fe4ad54d",
            "placeholder": "​",
            "style": "IPY_MODEL_563d8b35192240be960bc08909984119",
            "value": "Downloading builder script: "
          }
        },
        "70e437b3ba294189b4799c6607532ebd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b94385d1423e47f5a9e2351bf873c3e0",
            "max": 2472,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_e1b6e7774bc94a87ad23fb53d6c9b985",
            "value": 2472
          }
        },
        "0fb47d91dbf9497cac1ffc1c5dfd4519": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a0b523772cf04a85b0ac000cc9a83c67",
            "placeholder": "​",
            "style": "IPY_MODEL_71a3f1b2112344ea81721e59cce14cec",
            "value": " 6.33k/? [00:00&lt;00:00, 198kB/s]"
          }
        },
        "9cfec0f21c0a459f9f5888c389a6a479": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef66098fb5f748eabe11abc3fe4ad54d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "563d8b35192240be960bc08909984119": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b94385d1423e47f5a9e2351bf873c3e0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e1b6e7774bc94a87ad23fb53d6c9b985": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a0b523772cf04a85b0ac000cc9a83c67": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "71a3f1b2112344ea81721e59cce14cec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a275c54cbefb4438a3015080e8b57999": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_a4696c167a3247bd8fd0727e0556463a",
              "IPY_MODEL_afa79c37c031491da9e229c637d80cc4",
              "IPY_MODEL_5ddf799b2fd94edc9949d36450a2d5e9"
            ],
            "layout": "IPY_MODEL_592af11564074af19e40bce6680ed7f1"
          }
        },
        "a4696c167a3247bd8fd0727e0556463a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_125b61b8e80d4192a6f19d43ba4797dc",
            "placeholder": "​",
            "style": "IPY_MODEL_d9ff16ada2d94eb7a1adc70e5265ff2d",
            "value": "Downloading: 100%"
          }
        },
        "afa79c37c031491da9e229c637d80cc4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f9bd10de9e2845f08100a29293b92d1c",
            "max": 373,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_c28349dbeccc4124b583e0eeec004e6b",
            "value": 373
          }
        },
        "5ddf799b2fd94edc9949d36450a2d5e9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ff2cf349b07442bd9812dd8c7e82e59b",
            "placeholder": "​",
            "style": "IPY_MODEL_414d466fed0b42378d8b38f10c720eba",
            "value": " 373/373 [00:00&lt;00:00, 10.0kB/s]"
          }
        },
        "592af11564074af19e40bce6680ed7f1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "125b61b8e80d4192a6f19d43ba4797dc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d9ff16ada2d94eb7a1adc70e5265ff2d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f9bd10de9e2845f08100a29293b92d1c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c28349dbeccc4124b583e0eeec004e6b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "ff2cf349b07442bd9812dd8c7e82e59b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "414d466fed0b42378d8b38f10c720eba": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e432f1e3e5c54358a321a21e9c7aad1f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_cf931d70dc1a4d2ba5f10dba7bf90ece",
              "IPY_MODEL_58f1edc459ef4f5bab25544474897db3",
              "IPY_MODEL_8894005504364c36964d283cf58bb223"
            ],
            "layout": "IPY_MODEL_78db41a453ce4ff4884960c615147331"
          }
        },
        "cf931d70dc1a4d2ba5f10dba7bf90ece": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fe8d877f0fc1417baad9838094045475",
            "placeholder": "​",
            "style": "IPY_MODEL_74bfdb85ed55436f8c12bf9b25375533",
            "value": "Downloading: 100%"
          }
        },
        "58f1edc459ef4f5bab25544474897db3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e813e2a1cb7248b7a8c404d55e4fb248",
            "max": 231508,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_cb3c438fb3a6412d80b5ba673a6455cb",
            "value": 231508
          }
        },
        "8894005504364c36964d283cf58bb223": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bb189f5bc189462cad4824a1c30335c0",
            "placeholder": "​",
            "style": "IPY_MODEL_e98b7218049f4310951a1608c52c14e0",
            "value": " 232k/232k [00:00&lt;00:00, 2.99MB/s]"
          }
        },
        "78db41a453ce4ff4884960c615147331": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fe8d877f0fc1417baad9838094045475": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "74bfdb85ed55436f8c12bf9b25375533": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e813e2a1cb7248b7a8c404d55e4fb248": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cb3c438fb3a6412d80b5ba673a6455cb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "bb189f5bc189462cad4824a1c30335c0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e98b7218049f4310951a1608c52c14e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3dd1f27ff0d24a1294534ff7e69a7abb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_4128d82e19f14e9d9be5416ebc974d0d",
              "IPY_MODEL_6676a80dc293456ea7aed4ce3e281d83",
              "IPY_MODEL_55a7a4c336884f26a53292d559a06ff8"
            ],
            "layout": "IPY_MODEL_d27469698b1e4ad1ae74ced6f7c3942d"
          }
        },
        "4128d82e19f14e9d9be5416ebc974d0d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d201490a05c049d38b087008aac0a400",
            "placeholder": "​",
            "style": "IPY_MODEL_703d715a4ef64c4e93cc6496f5340451",
            "value": "Downloading: 100%"
          }
        },
        "6676a80dc293456ea7aed4ce3e281d83": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_41d861058e3e458e949f1f3d92623217",
            "max": 711494,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f4d9343bd31d47b1b3dcf0494825be2d",
            "value": 711494
          }
        },
        "55a7a4c336884f26a53292d559a06ff8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b75e2a47db2b47dd8740f77b337c308f",
            "placeholder": "​",
            "style": "IPY_MODEL_d8849516ccb44011a7f9e7e745b30c60",
            "value": " 711k/711k [00:00&lt;00:00, 6.20MB/s]"
          }
        },
        "d27469698b1e4ad1ae74ced6f7c3942d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d201490a05c049d38b087008aac0a400": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "703d715a4ef64c4e93cc6496f5340451": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "41d861058e3e458e949f1f3d92623217": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f4d9343bd31d47b1b3dcf0494825be2d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b75e2a47db2b47dd8740f77b337c308f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d8849516ccb44011a7f9e7e745b30c60": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ae5928c8da4243fba06ae9bf5086ba31": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e613455bcbb24e36a31666acd83d7b24",
              "IPY_MODEL_bac45a33f9b444a1985ef56a9be85c52",
              "IPY_MODEL_91ac0673e600400f904b1b10deb86cee"
            ],
            "layout": "IPY_MODEL_289f23dd30814993afde0f5e987fdd9e"
          }
        },
        "e613455bcbb24e36a31666acd83d7b24": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8e72912c0e434060ac30517a98d07a9e",
            "placeholder": "​",
            "style": "IPY_MODEL_ede252ab2cee4ffbbc2f5519373d1e97",
            "value": "Downloading: 100%"
          }
        },
        "bac45a33f9b444a1985ef56a9be85c52": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f2da21cc1007475ca0233a9e5d146d65",
            "max": 125,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_92fd7c43f87142d1bbd05f89ba3bfe39",
            "value": 125
          }
        },
        "91ac0673e600400f904b1b10deb86cee": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b58c9bfa30b3421496adb52e082cb50a",
            "placeholder": "​",
            "style": "IPY_MODEL_6cbfa925d26e47139365d10b9b28d96a",
            "value": " 125/125 [00:00&lt;00:00, 5.13kB/s]"
          }
        },
        "289f23dd30814993afde0f5e987fdd9e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8e72912c0e434060ac30517a98d07a9e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ede252ab2cee4ffbbc2f5519373d1e97": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f2da21cc1007475ca0233a9e5d146d65": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "92fd7c43f87142d1bbd05f89ba3bfe39": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b58c9bfa30b3421496adb52e082cb50a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6cbfa925d26e47139365d10b9b28d96a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d161016f9fea41e6b27eb537c12d0703": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_f315bcfdc76848cb8e851a2698e0248b",
              "IPY_MODEL_ef152955607540f2a7d38bf9e2207eec",
              "IPY_MODEL_456c36425ac94dc294f8402c07668a51"
            ],
            "layout": "IPY_MODEL_4d305d32efdf4b639e65e816a7132597"
          }
        },
        "f315bcfdc76848cb8e851a2698e0248b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5c3a10b039c344509be9867ca40a8472",
            "placeholder": "​",
            "style": "IPY_MODEL_6663eac35b7a4043b97edb90a555e3d9",
            "value": "  0%"
          }
        },
        "ef152955607540f2a7d38bf9e2207eec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_30bce58edba74043abc1a2625c492d4a",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d4b6dbbad9c946ed99b6c6e587bfb6da",
            "value": 0
          }
        },
        "456c36425ac94dc294f8402c07668a51": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8976a59e4ea049088f92a37f7547e16e",
            "placeholder": "​",
            "style": "IPY_MODEL_6a689955d9b3463abaaaa03b62d3cf69",
            "value": " 0/1 [00:00&lt;?, ?ba/s]"
          }
        },
        "4d305d32efdf4b639e65e816a7132597": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5c3a10b039c344509be9867ca40a8472": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6663eac35b7a4043b97edb90a555e3d9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "30bce58edba74043abc1a2625c492d4a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d4b6dbbad9c946ed99b6c6e587bfb6da": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8976a59e4ea049088f92a37f7547e16e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6a689955d9b3463abaaaa03b62d3cf69": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9d65a59161cd401aad05f4a52d51c724": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_6f244b91a3884eb5b0fbd577ed5d1710",
              "IPY_MODEL_2b6ad660dd1f4c78855433118b9fb61e",
              "IPY_MODEL_101fa9a9581a46d8b1e0951f03796740"
            ],
            "layout": "IPY_MODEL_7ffe4378bc7b410780780dd51d0705ea"
          }
        },
        "6f244b91a3884eb5b0fbd577ed5d1710": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_77c422e831944566a6529da37645ef6d",
            "placeholder": "​",
            "style": "IPY_MODEL_9a88121d0138438980f1c7e4341f480a",
            "value": "  0%"
          }
        },
        "2b6ad660dd1f4c78855433118b9fb61e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_52600cdbf4804b148e02724ae4902de5",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_11f16a0c34e64d6494ac1d2550d18f8f",
            "value": 0
          }
        },
        "101fa9a9581a46d8b1e0951f03796740": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_645616ac236e479c8303a56100d26d51",
            "placeholder": "​",
            "style": "IPY_MODEL_859e35e323f0407fbdea9eb7ae953742",
            "value": " 0/1 [00:00&lt;?, ?ba/s]"
          }
        },
        "7ffe4378bc7b410780780dd51d0705ea": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "77c422e831944566a6529da37645ef6d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9a88121d0138438980f1c7e4341f480a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "52600cdbf4804b148e02724ae4902de5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "11f16a0c34e64d6494ac1d2550d18f8f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "645616ac236e479c8303a56100d26d51": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "859e35e323f0407fbdea9eb7ae953742": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "40c2b37fa07f44648cecc9b7e406e7e2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_69ed5fe8ed6046acb4202689c065f858",
              "IPY_MODEL_bbd9cf7a77aa48fda3a648583ed02b08",
              "IPY_MODEL_ebe8b2b35e884fd28bb42eacf01ff07c"
            ],
            "layout": "IPY_MODEL_ddb14bc1d5d4437a9ee4a895846e7d29"
          }
        },
        "69ed5fe8ed6046acb4202689c065f858": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_669c24c6309f46cbbdcd0c764143e74f",
            "placeholder": "​",
            "style": "IPY_MODEL_d1e4665beafa4bbeb25d0e9e8447a5a9",
            "value": " 89%"
          }
        },
        "bbd9cf7a77aa48fda3a648583ed02b08": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_09217bdc1e2145eb84cc97207595e6f0",
            "max": 9,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_b294af01ac5f483dacbe2e1c40fdf223",
            "value": 8
          }
        },
        "ebe8b2b35e884fd28bb42eacf01ff07c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c1e27e9184204d618ce59b97f7302335",
            "placeholder": "​",
            "style": "IPY_MODEL_2e63f2af443d448aaaddf81127def048",
            "value": " 8/9 [00:00&lt;00:00,  9.96ba/s]"
          }
        },
        "ddb14bc1d5d4437a9ee4a895846e7d29": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "669c24c6309f46cbbdcd0c764143e74f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d1e4665beafa4bbeb25d0e9e8447a5a9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "09217bdc1e2145eb84cc97207595e6f0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b294af01ac5f483dacbe2e1c40fdf223": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c1e27e9184204d618ce59b97f7302335": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2e63f2af443d448aaaddf81127def048": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9239cc2fd1d94d86986b7f395de70fca": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_f1e8d31b67db4089ab1b036bda341617",
              "IPY_MODEL_33be40ebcab54ff68855f1145cf5e1d6",
              "IPY_MODEL_d96c111f09d74a0c9816328f88d9e45b"
            ],
            "layout": "IPY_MODEL_3907dc2aaa484877aee9beab8a6888d4"
          }
        },
        "f1e8d31b67db4089ab1b036bda341617": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_aaacfb0f3bd1427ea44ec84c28a2aaf7",
            "placeholder": "​",
            "style": "IPY_MODEL_cb92e843491142e8a2a4008223a90d02",
            "value": " 67%"
          }
        },
        "33be40ebcab54ff68855f1145cf5e1d6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_52169f264141463e94a7761a4ffb3f7a",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_e509b790873740b59aa2f52875ca2038",
            "value": 2
          }
        },
        "d96c111f09d74a0c9816328f88d9e45b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a395318bce7348d78ca83a308552f042",
            "placeholder": "​",
            "style": "IPY_MODEL_a2bb171f700743559e1d2c472c8289ef",
            "value": " 2/3 [00:00&lt;00:00,  8.13ba/s]"
          }
        },
        "3907dc2aaa484877aee9beab8a6888d4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "aaacfb0f3bd1427ea44ec84c28a2aaf7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cb92e843491142e8a2a4008223a90d02": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "52169f264141463e94a7761a4ffb3f7a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e509b790873740b59aa2f52875ca2038": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a395318bce7348d78ca83a308552f042": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a2bb171f700743559e1d2c472c8289ef": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f0fc94c6df4c432f9e1edcfceaf44edd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_03f4216f904d4bf6a8e461c50f40378f",
              "IPY_MODEL_76fecb01ad6441a08ad0ddb989a8ab80",
              "IPY_MODEL_783edab3150d40a3bf99ed910cdbaf88"
            ],
            "layout": "IPY_MODEL_e33897e8fa4841308c55b19352876ab1"
          }
        },
        "03f4216f904d4bf6a8e461c50f40378f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_909d7d349769402a8c20b02b35eafbd9",
            "placeholder": "​",
            "style": "IPY_MODEL_6139e3d551044671a79e15e8adf351ab",
            "value": "Downloading: 100%"
          }
        },
        "76fecb01ad6441a08ad0ddb989a8ab80": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1c19ec799ccb4e788f34ed8ac37d495f",
            "max": 151,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_7a49ac5f18f64b41825378184f8c32ec",
            "value": 151
          }
        },
        "783edab3150d40a3bf99ed910cdbaf88": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_76ad2415389f41deb4f2810bca56b753",
            "placeholder": "​",
            "style": "IPY_MODEL_8c4bb02e55fa48429c8f86dac5cb369e",
            "value": " 151/151 [00:00&lt;00:00, 5.83kB/s]"
          }
        },
        "e33897e8fa4841308c55b19352876ab1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "909d7d349769402a8c20b02b35eafbd9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6139e3d551044671a79e15e8adf351ab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1c19ec799ccb4e788f34ed8ac37d495f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7a49ac5f18f64b41825378184f8c32ec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "76ad2415389f41deb4f2810bca56b753": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8c4bb02e55fa48429c8f86dac5cb369e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "980374f604ec4970b0afa70d108c864b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_baa9e8a9169a45efb8c117fdf4ea45b7",
              "IPY_MODEL_aa248b7d4eae4e5d965a7d04144adacc",
              "IPY_MODEL_311d8199627e4c4f83d02c167b5755f3"
            ],
            "layout": "IPY_MODEL_a299d926edbb4c51906b1be8f694d074"
          }
        },
        "baa9e8a9169a45efb8c117fdf4ea45b7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c76ebf5d6c6c46cd9d14591a47725ae1",
            "placeholder": "​",
            "style": "IPY_MODEL_178555439d854d30a01cac053adf9079",
            "value": "Downloading: 100%"
          }
        },
        "aa248b7d4eae4e5d965a7d04144adacc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_00c8d81110fe4f4bbcf77be4d20581c3",
            "max": 1055,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_43c18ef010014cb395f045dd26497fba",
            "value": 1055
          }
        },
        "311d8199627e4c4f83d02c167b5755f3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3b83d1f429d34e8e8de6ddff555df02d",
            "placeholder": "​",
            "style": "IPY_MODEL_91a8348ac2194686a9ef075f7d49687d",
            "value": " 1.05k/1.05k [00:00&lt;00:00, 26.5kB/s]"
          }
        },
        "a299d926edbb4c51906b1be8f694d074": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c76ebf5d6c6c46cd9d14591a47725ae1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "178555439d854d30a01cac053adf9079": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "00c8d81110fe4f4bbcf77be4d20581c3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "43c18ef010014cb395f045dd26497fba": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "3b83d1f429d34e8e8de6ddff555df02d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "91a8348ac2194686a9ef075f7d49687d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "42dcc74bff5440608a3e9f2fa580cd3c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_616c54d0cd534047b93b215e7baf2ba3",
              "IPY_MODEL_846ce38d6fb84279a7419091f2d269b0",
              "IPY_MODEL_1e15050772b54e34b014a98b9710c783"
            ],
            "layout": "IPY_MODEL_951f476862ea49619100e202a6e742f6"
          }
        },
        "616c54d0cd534047b93b215e7baf2ba3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f6e25547ba664cd59128536944a926fd",
            "placeholder": "​",
            "style": "IPY_MODEL_53185a2afedc41e0a680d5007656b90b",
            "value": "Downloading: 100%"
          }
        },
        "846ce38d6fb84279a7419091f2d269b0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_860f8204efae42d6ad2fa4eb9e661810",
            "max": 995526,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_e61ec283a9c04cc696d17bbe24ccf460",
            "value": 995526
          }
        },
        "1e15050772b54e34b014a98b9710c783": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_15abc268777a4e3cb5c3c7f430745c1d",
            "placeholder": "​",
            "style": "IPY_MODEL_e6c1266e8b074bdfafa0db6208743a07",
            "value": " 996k/996k [00:00&lt;00:00, 4.17MB/s]"
          }
        },
        "951f476862ea49619100e202a6e742f6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f6e25547ba664cd59128536944a926fd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "53185a2afedc41e0a680d5007656b90b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "860f8204efae42d6ad2fa4eb9e661810": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e61ec283a9c04cc696d17bbe24ccf460": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "15abc268777a4e3cb5c3c7f430745c1d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e6c1266e8b074bdfafa0db6208743a07": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "01a12f499b7942cc90f2032a8f3284e9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_804bf9b8a2154399a05dd0860f4dfd89",
              "IPY_MODEL_66fca4d7946240c3b08ba51fac82f2ae",
              "IPY_MODEL_6cefb3be5be9488ca033ed9908c6a8f5"
            ],
            "layout": "IPY_MODEL_2d0f2804db004da0914e4733ce96b749"
          }
        },
        "804bf9b8a2154399a05dd0860f4dfd89": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3d6b93cde5254ce99f19802b7c1146f4",
            "placeholder": "​",
            "style": "IPY_MODEL_838416bfbee0400299abff324c4825bc",
            "value": "Downloading: 100%"
          }
        },
        "66fca4d7946240c3b08ba51fac82f2ae": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f7584b77d97f4d48aa5b50bae2df49f8",
            "max": 112,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_0a5cd97f8a914ff89fd27aed7b38164d",
            "value": 112
          }
        },
        "6cefb3be5be9488ca033ed9908c6a8f5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ad4c308ebd574e909d31b161580b9064",
            "placeholder": "​",
            "style": "IPY_MODEL_74030c98fcb942ed9d9ffc43799113f0",
            "value": " 112/112 [00:00&lt;00:00, 4.51kB/s]"
          }
        },
        "2d0f2804db004da0914e4733ce96b749": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3d6b93cde5254ce99f19802b7c1146f4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "838416bfbee0400299abff324c4825bc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f7584b77d97f4d48aa5b50bae2df49f8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0a5cd97f8a914ff89fd27aed7b38164d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "ad4c308ebd574e909d31b161580b9064": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "74030c98fcb942ed9d9ffc43799113f0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "84631a71d2ca4ae8a781019ea3ce6da9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9666c2ff4f32449ea3cbef076a166836",
              "IPY_MODEL_efc4af4547804d9daae235691942e73a",
              "IPY_MODEL_c8194cd34f554a789359eab6e7596291"
            ],
            "layout": "IPY_MODEL_54f649d297ec4456be5b5df14497fb93"
          }
        },
        "9666c2ff4f32449ea3cbef076a166836": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e35bc6f834c4490e85ac2ae25d9e922f",
            "placeholder": "​",
            "style": "IPY_MODEL_b166c61328bb49ea803f0d3a7d515d81",
            "value": "  0%"
          }
        },
        "efc4af4547804d9daae235691942e73a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a2aac740ef3b4f3c913b71c82b408c2c",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d77e188e43dc4e01b82054f2a6a8e832",
            "value": 0
          }
        },
        "c8194cd34f554a789359eab6e7596291": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_266b2c90bb4d41198784f016e996066a",
            "placeholder": "​",
            "style": "IPY_MODEL_cc62b20cee8c4a4b8e24576d1c854fbf",
            "value": " 0/1 [00:00&lt;?, ?ba/s]"
          }
        },
        "54f649d297ec4456be5b5df14497fb93": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e35bc6f834c4490e85ac2ae25d9e922f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b166c61328bb49ea803f0d3a7d515d81": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a2aac740ef3b4f3c913b71c82b408c2c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d77e188e43dc4e01b82054f2a6a8e832": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "266b2c90bb4d41198784f016e996066a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cc62b20cee8c4a4b8e24576d1c854fbf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "98bebe04cb254369bb3b6b991d4b2648": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_40080df663cc43749963657150cf632d",
              "IPY_MODEL_a04f157a98db4d47b75094b6ef1b0990",
              "IPY_MODEL_ba2967950f4c483ea399827046f52963"
            ],
            "layout": "IPY_MODEL_a9e0ad6a141a462fb9bea1c18d447332"
          }
        },
        "40080df663cc43749963657150cf632d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_31fbaf0ffb0845f5800e6fca0353b929",
            "placeholder": "​",
            "style": "IPY_MODEL_4a2c17e757d34547a4a68718ef064073",
            "value": "  0%"
          }
        },
        "a04f157a98db4d47b75094b6ef1b0990": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f74d219071ab49479194f1061bf343be",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_a92097360dba4d5c848b48e345b0028e",
            "value": 0
          }
        },
        "ba2967950f4c483ea399827046f52963": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_24a164b22a8f4e00944ef05bcec5d032",
            "placeholder": "​",
            "style": "IPY_MODEL_976ade0b37cd43e2aa5aa272dac2445b",
            "value": " 0/1 [00:00&lt;?, ?ba/s]"
          }
        },
        "a9e0ad6a141a462fb9bea1c18d447332": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "31fbaf0ffb0845f5800e6fca0353b929": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4a2c17e757d34547a4a68718ef064073": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f74d219071ab49479194f1061bf343be": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a92097360dba4d5c848b48e345b0028e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "24a164b22a8f4e00944ef05bcec5d032": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "976ade0b37cd43e2aa5aa272dac2445b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "21b02caa5dc146b8ac2bd1a282381c7f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_0c01b39e34744f74ae47d0d4e70638ce",
              "IPY_MODEL_82e7357418144359abba3548449c0c08",
              "IPY_MODEL_2ff5e18b6d684b99a82676dbf3db6d32"
            ],
            "layout": "IPY_MODEL_cd75e771337843d9b55838502bed9a1b"
          }
        },
        "0c01b39e34744f74ae47d0d4e70638ce": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b6a4250c705f4dd8b9f52731cce2a23d",
            "placeholder": "​",
            "style": "IPY_MODEL_4549eb0838864025ac6a0f3da9192818",
            "value": "Downloading: 100%"
          }
        },
        "82e7357418144359abba3548449c0c08": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b2d377844c1a4bc09433a94088f5213e",
            "max": 709144049,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_fe7e058b9a6944969d83f7e72b398bb1",
            "value": 709144049
          }
        },
        "2ff5e18b6d684b99a82676dbf3db6d32": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_47d5ccd1eafe4ea1a3476e06d998bd74",
            "placeholder": "​",
            "style": "IPY_MODEL_5a17027205cb4c2bbe140e1e96e4b495",
            "value": " 709M/709M [00:11&lt;00:00, 62.7MB/s]"
          }
        },
        "cd75e771337843d9b55838502bed9a1b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b6a4250c705f4dd8b9f52731cce2a23d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4549eb0838864025ac6a0f3da9192818": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b2d377844c1a4bc09433a94088f5213e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fe7e058b9a6944969d83f7e72b398bb1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "47d5ccd1eafe4ea1a3476e06d998bd74": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5a17027205cb4c2bbe140e1e96e4b495": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "49642b493a2d4c3592ed010663ef789c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_fcc50d75dfb04fccb26c9b93bf8f1efa",
              "IPY_MODEL_dc630459e6564d69833d46b63493a160",
              "IPY_MODEL_31d0648af26b4fe797f2cb2ff21336a8"
            ],
            "layout": "IPY_MODEL_90e567bcc88445f695a896af6d8da649"
          }
        },
        "fcc50d75dfb04fccb26c9b93bf8f1efa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2e815baaae8940ffb90e2aaf5c6f7e2a",
            "placeholder": "​",
            "style": "IPY_MODEL_32250602bfc140d18859e6b48f9dbfbc",
            "value": " 89%"
          }
        },
        "dc630459e6564d69833d46b63493a160": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a85179eeb7d94bba8c79a10a734d8c6b",
            "max": 9,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_7346605d4df548afb179d489217990ff",
            "value": 8
          }
        },
        "31d0648af26b4fe797f2cb2ff21336a8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6b1a06b7d6ea43da8b9af7f92a882455",
            "placeholder": "​",
            "style": "IPY_MODEL_a1b4b4cc26ff4c50b4ef5c99d8c7cb3e",
            "value": " 8/9 [00:00&lt;00:00,  9.55ba/s]"
          }
        },
        "90e567bcc88445f695a896af6d8da649": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2e815baaae8940ffb90e2aaf5c6f7e2a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "32250602bfc140d18859e6b48f9dbfbc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a85179eeb7d94bba8c79a10a734d8c6b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7346605d4df548afb179d489217990ff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "6b1a06b7d6ea43da8b9af7f92a882455": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a1b4b4cc26ff4c50b4ef5c99d8c7cb3e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7b3ce57b6b5e4253b8219adc2c6ff47e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ee7bc9576ea44b97a6ad9d2ae8adaec7",
              "IPY_MODEL_c012f774a21e44188365f5b0646b422e",
              "IPY_MODEL_5899086ed64c4d3185374a7f541e22fe"
            ],
            "layout": "IPY_MODEL_3aaf8990b0574a9183b9902eb33670a5"
          }
        },
        "ee7bc9576ea44b97a6ad9d2ae8adaec7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cc3d6c6b95ab4c80983b3ce2175acb8a",
            "placeholder": "​",
            "style": "IPY_MODEL_873c876ae8f64b9bba4f25efa3a3859a",
            "value": " 67%"
          }
        },
        "c012f774a21e44188365f5b0646b422e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "danger",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d727d1396df2443e9e46ab7e0c7d5276",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_cda8e993793c4b949910e309a3f50a03",
            "value": 2
          }
        },
        "5899086ed64c4d3185374a7f541e22fe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5f38c6b988b44678a7b7f06a99daa983",
            "placeholder": "​",
            "style": "IPY_MODEL_7b53cf7c82a6439eb94d5f0635afe2f3",
            "value": " 2/3 [00:00&lt;00:00,  7.76ba/s]"
          }
        },
        "3aaf8990b0574a9183b9902eb33670a5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cc3d6c6b95ab4c80983b3ce2175acb8a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "873c876ae8f64b9bba4f25efa3a3859a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d727d1396df2443e9e46ab7e0c7d5276": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cda8e993793c4b949910e309a3f50a03": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "5f38c6b988b44678a7b7f06a99daa983": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7b53cf7c82a6439eb94d5f0635afe2f3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}